Coverage Report

Created: 2025-12-14 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
#if defined(_MSC_VER)
57
#define m512bh(p) p
58
#define m512i(p) p
59
#else
60
#define m512bh(p) (__m512bh)(p)
61
#define m512i(p) (__m512i)(p)
62
#endif
63
64
#if defined(__linux__) || \
65
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
66
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
67
68
#include <unistd.h>
69
#include <sys/types.h>
70
#include <sys/stat.h>
71
#include <sys/wait.h>
72
#if defined(__linux__)
73
#include <sys/prctl.h>
74
#endif
75
76
#if defined(__ANDROID__)
77
#include <unwind.h>
78
#include <dlfcn.h>
79
#include <stdio.h>
80
81
struct backtrace_state {
82
    void ** current;
83
    void ** end;
84
};
85
86
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
87
    struct backtrace_state * state = (struct backtrace_state *)arg;
88
    uintptr_t pc = _Unwind_GetIP(context);
89
    if (pc) {
90
        if (state->current == state->end) {
91
            return _URC_END_OF_STACK;
92
        } else {
93
            *state->current++ = (void*)pc;
94
        }
95
    }
96
    return _URC_NO_REASON;
97
}
98
99
static void ggml_print_backtrace_symbols(void) {
100
    const int max = 100;
101
    void* buffer[max];
102
103
    struct backtrace_state state = {buffer, buffer + max};
104
    _Unwind_Backtrace(unwind_callback, &state);
105
106
    int count = state.current - buffer;
107
108
    for (int idx = 0; idx < count; ++idx) {
109
        const void * addr = buffer[idx];
110
        const char * symbol = "";
111
112
        Dl_info info;
113
        if (dladdr(addr, &info) && info.dli_sname) {
114
            symbol = info.dli_sname;
115
        }
116
117
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
118
    }
119
}
120
#elif defined(__linux__) && defined(__GLIBC__)
121
#include <execinfo.h>
122
0
static void ggml_print_backtrace_symbols(void) {
123
0
    void * trace[100];
124
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
125
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
126
0
}
127
#elif defined(__APPLE__)
128
#include <execinfo.h>
129
static void ggml_print_backtrace_symbols(void) {
130
    void * trace[100];
131
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
132
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
133
}
134
#else
135
static void ggml_print_backtrace_symbols(void) {
136
    // platform not supported
137
}
138
#endif
139
140
0
void ggml_print_backtrace(void) {
141
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
142
0
    if (GGML_NO_BACKTRACE) {
143
0
        return;
144
0
    }
145
#if defined(__APPLE__)
146
    // On macOS, fork+debugger attachment is problematic due to:
147
    // 1. libdispatch "poisons" forked child processes
148
    // 2. lldb has issues attaching to parent from forked child
149
    // Use simple backtrace() instead to avoid Terminal.app crashes
150
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
151
    if (!GGML_BACKTRACE_LLDB) {
152
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
153
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
154
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
155
        ggml_print_backtrace_symbols();
156
        return;
157
    }
158
#endif
159
0
#if defined(__linux__)
160
0
    FILE * f = fopen("/proc/self/status", "r");
161
0
    size_t size = 0;
162
0
    char * line = NULL;
163
0
    ssize_t length = 0;
164
0
    while ((length = getline(&line, &size, f)) > 0) {
165
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
166
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
167
            // Already being debugged, and the breakpoint is the later abort()
168
0
            free(line);
169
0
            fclose(f);
170
0
            return;
171
0
        }
172
0
    }
173
0
    free(line);
174
0
    fclose(f);
175
0
    int lock[2] = { -1, -1 };
176
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
177
0
#endif
178
0
    const int parent_pid = getpid();
179
0
    const int child_pid = fork();
180
0
    if (child_pid < 0) { // error
181
0
#if defined(__linux__)
182
0
        close(lock[1]);
183
0
        close(lock[0]);
184
0
#endif
185
0
        return;
186
0
    } else if (child_pid == 0) { // child
187
0
        char attach[32];
188
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
189
0
#if defined(__linux__)
190
0
        close(lock[1]);
191
0
        (void) !read(lock[0], lock, 1);
192
0
        close(lock[0]);
193
0
#endif
194
        // try gdb
195
0
        execlp("gdb", "gdb", "--batch",
196
0
            "-ex", "set style enabled on",
197
0
            "-ex", attach,
198
0
            "-ex", "bt -frame-info source-and-location",
199
0
            "-ex", "detach",
200
0
            "-ex", "quit",
201
0
            (char *) NULL);
202
        // try lldb
203
0
        execlp("lldb", "lldb", "--batch",
204
0
            "-o", "bt",
205
0
            "-o", "quit",
206
0
            "-p", &attach[sizeof("attach ") - 1],
207
0
            (char *) NULL);
208
        // gdb failed, fallback to backtrace_symbols
209
0
        ggml_print_backtrace_symbols();
210
0
        _Exit(0);
211
0
    } else { // parent
212
0
#if defined(__linux__)
213
0
        prctl(PR_SET_PTRACER, child_pid);
214
0
        close(lock[1]);
215
0
        close(lock[0]);
216
0
#endif
217
0
        waitpid(child_pid, NULL, 0);
218
0
    }
219
0
}
220
#else
221
void ggml_print_backtrace(void) {
222
    // platform not supported
223
}
224
#endif
225
226
static ggml_abort_callback_t g_abort_callback = NULL;
227
228
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
229
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
230
0
    ggml_abort_callback_t ret_val = g_abort_callback;
231
0
    g_abort_callback = callback;
232
0
    return ret_val;
233
0
}
234
235
70
void ggml_abort(const char * file, int line, const char * fmt, ...) {
236
70
    fflush(stdout);
237
238
70
    char message[2048];
239
70
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
240
241
70
    va_list args;
242
70
    va_start(args, fmt);
243
70
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
244
70
    va_end(args);
245
246
70
    if (g_abort_callback) {
247
0
        g_abort_callback(message);
248
70
    } else {
249
        // default: print error and backtrace to stderr
250
70
        fprintf(stderr, "%s\n", message);
251
        
252
70
    }
253
254
70
    abort();
255
70
}
256
257
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
258
259
//
260
// logging
261
//
262
263
struct ggml_logger_state {
264
    ggml_log_callback log_callback;
265
    void * log_callback_user_data;
266
};
267
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
268
269
1.03k
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
270
1.03k
    if (format == NULL) {
271
0
        return;
272
0
    }
273
1.03k
    va_list args_copy;
274
1.03k
    va_copy(args_copy, args);
275
1.03k
    char buffer[128];
276
1.03k
    int len = vsnprintf(buffer, 128, format, args);
277
1.03k
    if (len < 128) {
278
1.01k
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
279
1.01k
    } else {
280
17
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
281
17
        vsnprintf(buffer2, len + 1, format, args_copy);
282
17
        buffer2[len] = 0;
283
17
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
284
17
        free(buffer2);
285
17
    }
286
1.03k
    va_end(args_copy);
287
1.03k
}
288
289
1.03k
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
290
1.03k
    va_list args;
291
1.03k
    va_start(args, format);
292
1.03k
    ggml_log_internal_v(level, format, args);
293
1.03k
    va_end(args);
294
1.03k
}
295
296
1.03k
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
297
1.03k
    (void) level;
298
1.03k
    (void) user_data;
299
1.03k
    fputs(text, stderr);
300
1.03k
    fflush(stderr);
301
1.03k
}
302
303
//
304
// end of logging block
305
//
306
307
#ifdef GGML_USE_ACCELERATE
308
// uncomment to use vDSP for soft max computation
309
// note: not sure if it is actually faster
310
//#define GGML_SOFT_MAX_ACCELERATE
311
#endif
312
313
314
1.36k
void * ggml_aligned_malloc(size_t size) {
315
#if defined(__s390x__)
316
    const int alignment = 256;
317
#else
318
1.36k
    const int alignment = 64;
319
1.36k
#endif
320
321
#if defined(_MSC_VER) || defined(__MINGW32__)
322
    return _aligned_malloc(size, alignment);
323
#else
324
1.36k
    if (size == 0) {
325
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
326
0
        return NULL;
327
0
    }
328
1.36k
    void * aligned_memory = NULL;
329
  #ifdef GGML_USE_CPU_HBM
330
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
331
  #elif TARGET_OS_OSX
332
    GGML_UNUSED(alignment);
333
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
334
    int result = EFAULT;
335
    switch (alloc_status) {
336
        case KERN_SUCCESS:
337
            result = 0;
338
            break;
339
        case KERN_INVALID_ADDRESS:
340
            result = EINVAL;
341
            break;
342
        case KERN_NO_SPACE:
343
            result = ENOMEM;
344
            break;
345
        default:
346
            result = EFAULT;
347
            break;
348
    }
349
  #else
350
1.36k
    int result = posix_memalign(&aligned_memory, alignment, size);
351
1.36k
  #endif
352
1.36k
    if (result != 0) {
353
        // Handle allocation failure
354
0
        const char *error_desc = "unknown allocation error";
355
0
        switch (result) {
356
0
            case EINVAL:
357
0
                error_desc = "invalid alignment value";
358
0
                break;
359
0
            case ENOMEM:
360
0
                error_desc = "insufficient memory";
361
0
                break;
362
0
        }
363
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
364
0
        return NULL;
365
0
    }
366
1.36k
    return aligned_memory;
367
1.36k
#endif
368
1.36k
}
369
370
1.36k
void ggml_aligned_free(void * ptr, size_t size) {
371
1.36k
    GGML_UNUSED(size);
372
#if defined(_MSC_VER) || defined(__MINGW32__)
373
    _aligned_free(ptr);
374
#elif GGML_USE_CPU_HBM
375
    if (ptr != NULL) {
376
        hbw_free(ptr);
377
    }
378
#elif TARGET_OS_OSX
379
    if (ptr != NULL) {
380
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
381
    }
382
#else
383
1.36k
    free(ptr);
384
1.36k
#endif
385
1.36k
}
386
387
388
1.36k
inline static void * ggml_malloc(size_t size) {
389
1.36k
    if (size == 0) {
390
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
391
0
        return NULL;
392
0
    }
393
1.36k
    void * result = malloc(size);
394
1.36k
    if (result == NULL) {
395
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
396
0
        GGML_ABORT("fatal error");
397
0
    }
398
1.36k
    return result;
399
1.36k
}
400
401
// calloc
402
0
inline static void * ggml_calloc(size_t num, size_t size) {
403
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
404
405
0
    if (num == 0 || size == 0) {
406
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
407
0
        return NULL;
408
0
    }
409
0
    void * result = calloc(num, size);
410
0
    if (result == NULL) {
411
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
412
0
        GGML_ABORT("fatal error");
413
0
    }
414
0
    return result;
415
0
}
416
417
1.36k
#define GGML_MALLOC(size)      ggml_malloc(size)
418
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
419
420
1.36k
#define GGML_FREE(ptr) free(ptr)
421
422
0
const char * ggml_status_to_string(enum ggml_status status) {
423
0
    switch (status) {
424
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
425
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
426
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
427
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
428
0
    }
429
430
0
    return "GGML status: unknown";
431
0
}
432
433
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
434
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
435
0
    return GGML_FP16_TO_FP32(x);
436
0
}
437
438
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
439
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
440
0
    return GGML_FP32_TO_FP16(x);
441
0
}
442
443
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
444
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
445
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
446
0
}
447
448
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
449
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
450
0
    return GGML_FP32_TO_BF16(x);
451
0
}
452
453
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
454
0
    for (int64_t i = 0; i < n; i++) {
455
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
456
0
    }
457
0
}
458
459
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
460
0
    int i = 0;
461
0
    for (; i < n; ++i) {
462
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
463
0
    }
464
0
}
465
466
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
467
0
    int i = 0;
468
0
    for (; i < n; ++i) {
469
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
470
0
    }
471
0
}
472
473
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
474
0
    for (int i = 0; i < n; i++) {
475
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
476
0
    }
477
0
}
478
479
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
480
0
  int i = 0;
481
#if defined(__AVX512BF16__)
482
  // subnormals are flushed to zero on this platform
483
  for (; i + 32 <= n; i += 32) {
484
        _mm512_storeu_si512(
485
            (__m512i *)(y + i),
486
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
487
                                _mm512_loadu_ps(x + i))));
488
  }
489
#endif
490
0
    for (; i < n; i++) {
491
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
492
0
    }
493
0
}
494
495
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
496
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
497
0
}
498
499
0
const char * ggml_version(void) {
500
0
    return GGML_VERSION;
501
0
}
502
503
0
const char * ggml_commit(void) {
504
0
    return GGML_COMMIT;
505
0
}
506
507
//
508
// timing
509
//
510
511
#if defined(_MSC_VER) || defined(__MINGW32__)
512
static int64_t timer_freq, timer_start;
513
void ggml_time_init(void) {
514
    LARGE_INTEGER t;
515
    QueryPerformanceFrequency(&t);
516
    timer_freq = t.QuadPart;
517
518
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
519
    // and the uptime is high enough.
520
    // We subtract the program start time to reduce the likelihood of that happening.
521
    QueryPerformanceCounter(&t);
522
    timer_start = t.QuadPart;
523
}
524
int64_t ggml_time_ms(void) {
525
    LARGE_INTEGER t;
526
    QueryPerformanceCounter(&t);
527
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
528
}
529
int64_t ggml_time_us(void) {
530
    LARGE_INTEGER t;
531
    QueryPerformanceCounter(&t);
532
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
533
}
534
#else
535
3.46k
void ggml_time_init(void) {}
536
0
int64_t ggml_time_ms(void) {
537
0
    struct timespec ts;
538
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
539
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
540
0
}
541
542
2.00k
int64_t ggml_time_us(void) {
543
2.00k
    struct timespec ts;
544
2.00k
    clock_gettime(CLOCK_MONOTONIC, &ts);
545
2.00k
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
546
2.00k
}
547
#endif
548
549
0
int64_t ggml_cycles(void) {
550
0
    return clock();
551
0
}
552
553
0
int64_t ggml_cycles_per_ms(void) {
554
0
    return CLOCKS_PER_SEC/1000;
555
0
}
556
557
//
558
// cross-platform UTF-8 file paths
559
//
560
561
#ifdef _WIN32
562
static wchar_t * ggml_mbstowcs(const char * mbs) {
563
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
564
    if (!wlen) {
565
        errno = EINVAL;
566
        return NULL;
567
    }
568
569
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
570
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
571
    if (!wlen) {
572
        GGML_FREE(wbuf);
573
        errno = EINVAL;
574
        return NULL;
575
    }
576
577
    return wbuf;
578
}
579
#endif
580
581
1.35k
FILE * ggml_fopen(const char * fname, const char * mode) {
582
#ifdef _WIN32
583
    FILE * file = NULL;
584
585
    // convert fname (UTF-8)
586
    wchar_t * wfname = ggml_mbstowcs(fname);
587
    if (wfname) {
588
        // convert mode (ANSI)
589
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
590
        wchar_t * wmode_p = wmode;
591
        do {
592
            *wmode_p++ = (wchar_t)*mode;
593
        } while (*mode++);
594
595
        // open file
596
        file = _wfopen(wfname, wmode);
597
598
        GGML_FREE(wfname);
599
        GGML_FREE(wmode);
600
    }
601
602
    return file;
603
#else
604
1.35k
    return fopen(fname, mode);
605
1.35k
#endif
606
607
1.35k
}
608
609
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
610
    [GGML_TYPE_I8] = {
611
        .type_name                = "i8",
612
        .blck_size                = 1,
613
        .type_size                = sizeof(int8_t),
614
        .is_quantized             = false,
615
    },
616
    [GGML_TYPE_I16] = {
617
        .type_name                = "i16",
618
        .blck_size                = 1,
619
        .type_size                = sizeof(int16_t),
620
        .is_quantized             = false,
621
    },
622
    [GGML_TYPE_I32] = {
623
        .type_name                = "i32",
624
        .blck_size                = 1,
625
        .type_size                = sizeof(int32_t),
626
        .is_quantized             = false,
627
    },
628
    [GGML_TYPE_I64] = {
629
        .type_name                = "i64",
630
        .blck_size                = 1,
631
        .type_size                = sizeof(int64_t),
632
        .is_quantized             = false,
633
    },
634
    [GGML_TYPE_F64] = {
635
        .type_name                = "f64",
636
        .blck_size                = 1,
637
        .type_size                = sizeof(double),
638
        .is_quantized             = false,
639
    },
640
    [GGML_TYPE_F32] = {
641
        .type_name                = "f32",
642
        .blck_size                = 1,
643
        .type_size                = sizeof(float),
644
        .is_quantized             = false,
645
    },
646
    [GGML_TYPE_F16] = {
647
        .type_name                = "f16",
648
        .blck_size                = 1,
649
        .type_size                = sizeof(ggml_fp16_t),
650
        .is_quantized             = false,
651
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
652
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
653
    },
654
    [GGML_TYPE_Q4_0] = {
655
        .type_name                = "q4_0",
656
        .blck_size                = QK4_0,
657
        .type_size                = sizeof(block_q4_0),
658
        .is_quantized             = true,
659
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
660
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
661
    },
662
    [GGML_TYPE_Q4_1] = {
663
        .type_name                = "q4_1",
664
        .blck_size                = QK4_1,
665
        .type_size                = sizeof(block_q4_1),
666
        .is_quantized             = true,
667
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
668
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
669
    },
670
    [4] = { // GGML_TYPE_Q4_2
671
        .type_name                = "DEPRECATED",
672
        .blck_size                = 0,
673
        .type_size                = 0,
674
        .is_quantized             = false,
675
    },
676
    [5] = { // GGML_TYPE_Q4_3
677
        .type_name                = "DEPRECATED",
678
        .blck_size                = 0,
679
        .type_size                = 0,
680
        .is_quantized             = false,
681
    },
682
    [GGML_TYPE_Q5_0] = {
683
        .type_name                = "q5_0",
684
        .blck_size                = QK5_0,
685
        .type_size                = sizeof(block_q5_0),
686
        .is_quantized             = true,
687
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
688
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
689
    },
690
    [GGML_TYPE_Q5_1] = {
691
        .type_name                = "q5_1",
692
        .blck_size                = QK5_1,
693
        .type_size                = sizeof(block_q5_1),
694
        .is_quantized             = true,
695
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
696
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
697
    },
698
    [GGML_TYPE_Q8_0] = {
699
        .type_name                = "q8_0",
700
        .blck_size                = QK8_0,
701
        .type_size                = sizeof(block_q8_0),
702
        .is_quantized             = true,
703
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
704
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
705
    },
706
    [GGML_TYPE_Q8_1] = {
707
        .type_name                = "q8_1",
708
        .blck_size                = QK8_1,
709
        .type_size                = sizeof(block_q8_1),
710
        .is_quantized             = true,
711
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
712
    },
713
    [GGML_TYPE_MXFP4] = {
714
        .type_name                = "mxfp4",
715
        .blck_size                = QK_MXFP4,
716
        .type_size                = sizeof(block_mxfp4),
717
        .is_quantized             = true,
718
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
719
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
720
    },
721
    [GGML_TYPE_Q2_K] = {
722
        .type_name                = "q2_K",
723
        .blck_size                = QK_K,
724
        .type_size                = sizeof(block_q2_K),
725
        .is_quantized             = true,
726
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
727
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
728
    },
729
    [GGML_TYPE_Q3_K] = {
730
        .type_name                = "q3_K",
731
        .blck_size                = QK_K,
732
        .type_size                = sizeof(block_q3_K),
733
        .is_quantized             = true,
734
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
735
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
736
    },
737
    [GGML_TYPE_Q4_K] = {
738
        .type_name                = "q4_K",
739
        .blck_size                = QK_K,
740
        .type_size                = sizeof(block_q4_K),
741
        .is_quantized             = true,
742
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
743
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
744
    },
745
    [GGML_TYPE_Q5_K] = {
746
        .type_name                = "q5_K",
747
        .blck_size                = QK_K,
748
        .type_size                = sizeof(block_q5_K),
749
        .is_quantized             = true,
750
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
751
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
752
    },
753
    [GGML_TYPE_Q6_K] = {
754
        .type_name                = "q6_K",
755
        .blck_size                = QK_K,
756
        .type_size                = sizeof(block_q6_K),
757
        .is_quantized             = true,
758
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
759
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
760
    },
761
    [GGML_TYPE_IQ2_XXS] = {
762
        .type_name                = "iq2_xxs",
763
        .blck_size                = QK_K,
764
        .type_size                = sizeof(block_iq2_xxs),
765
        .is_quantized             = true,
766
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
767
        .from_float_ref           = NULL,
768
    },
769
    [GGML_TYPE_IQ2_XS] = {
770
        .type_name                = "iq2_xs",
771
        .blck_size                = QK_K,
772
        .type_size                = sizeof(block_iq2_xs),
773
        .is_quantized             = true,
774
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
775
        .from_float_ref           = NULL,
776
    },
777
    [GGML_TYPE_IQ3_XXS] = {
778
        .type_name                = "iq3_xxs",
779
        .blck_size                = QK_K,
780
        .type_size                = sizeof(block_iq3_xxs),
781
        .is_quantized             = true,
782
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
783
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
784
    },
785
    [GGML_TYPE_IQ3_S] = {
786
        .type_name                = "iq3_s",
787
        .blck_size                = QK_K,
788
        .type_size                = sizeof(block_iq3_s),
789
        .is_quantized             = true,
790
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
791
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
792
    },
793
    [GGML_TYPE_IQ2_S] = {
794
        .type_name                = "iq2_s",
795
        .blck_size                = QK_K,
796
        .type_size                = sizeof(block_iq2_s),
797
        .is_quantized             = true,
798
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
799
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
800
    },
801
    [GGML_TYPE_IQ1_S] = {
802
        .type_name                = "iq1_s",
803
        .blck_size                = QK_K,
804
        .type_size                = sizeof(block_iq1_s),
805
        .is_quantized             = true,
806
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
807
        .from_float_ref           = NULL,
808
    },
809
    [GGML_TYPE_IQ1_M] = {
810
        .type_name                = "iq1_m",
811
        .blck_size                = QK_K,
812
        .type_size                = sizeof(block_iq1_m),
813
        .is_quantized             = true,
814
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
815
        .from_float_ref           = NULL,
816
    },
817
    [GGML_TYPE_IQ4_NL] = {
818
        .type_name                = "iq4_nl",
819
        .blck_size                = QK4_NL,
820
        .type_size                = sizeof(block_iq4_nl),
821
        .is_quantized             = true,
822
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
823
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
824
    },
825
    [GGML_TYPE_IQ4_XS] = {
826
        .type_name                = "iq4_xs",
827
        .blck_size                = QK_K,
828
        .type_size                = sizeof(block_iq4_xs),
829
        .is_quantized             = true,
830
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
831
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
832
    },
833
    [GGML_TYPE_Q8_K] = {
834
        .type_name                = "q8_K",
835
        .blck_size                = QK_K,
836
        .type_size                = sizeof(block_q8_K),
837
        .is_quantized             = true,
838
    },
839
    [GGML_TYPE_BF16] = {
840
        .type_name                = "bf16",
841
        .blck_size                = 1,
842
        .type_size                = sizeof(ggml_bf16_t),
843
        .is_quantized             = false,
844
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
845
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
846
    },
847
    [31] = { // GGML_TYPE_Q4_0_4_4
848
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
849
        .blck_size                = 0,
850
        .type_size                = 0,
851
        .is_quantized             = false,
852
    },
853
    [32] = { // GGML_TYPE_Q4_0_4_8
854
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
855
        .blck_size                = 0,
856
        .type_size                = 0,
857
        .is_quantized             = false,
858
    },
859
    [33] = { // GGML_TYPE_Q4_0_8_8
860
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
861
        .blck_size                = 0,
862
        .type_size                = 0,
863
        .is_quantized             = false,
864
    },
865
    [GGML_TYPE_TQ1_0] = {
866
        .type_name                = "tq1_0",
867
        .blck_size                = QK_K,
868
        .type_size                = sizeof(block_tq1_0),
869
        .is_quantized             = true,
870
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
871
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
872
    },
873
    [GGML_TYPE_TQ2_0] = {
874
        .type_name                = "tq2_0",
875
        .blck_size                = QK_K,
876
        .type_size                = sizeof(block_tq2_0),
877
        .is_quantized             = true,
878
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
879
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
880
    },
881
    [36] = { // GGML_TYPE_IQ4_NL_4_4
882
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
883
        .blck_size                = 0,
884
        .type_size                = 0,
885
        .is_quantized             = false,
886
    },
887
    [37] = { // GGML_TYPE_IQ4_NL_4_8
888
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
889
        .blck_size                = 0,
890
        .type_size                = 0,
891
        .is_quantized             = false,
892
    },
893
    [38] = { // GGML_TYPE_IQ4_NL_8_8
894
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
895
        .blck_size                = 0,
896
        .type_size                = 0,
897
        .is_quantized             = false,
898
    },
899
};
900
901
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
902
0
    GGML_ASSERT(type < GGML_TYPE_COUNT);
903
0
    return &type_traits[type];
904
0
}
905
906
//
907
// ggml object
908
//
909
910
struct ggml_object {
911
    size_t offs;
912
    size_t size;
913
914
    struct ggml_object * next;
915
916
    enum ggml_object_type type;
917
918
    char padding[4];
919
};
920
921
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
922
923
//
924
// ggml context
925
//
926
927
struct ggml_context {
928
    size_t mem_size;
929
    void * mem_buffer;
930
    bool   mem_buffer_owned;
931
    bool   no_alloc;
932
933
    int    n_objects;
934
935
    struct ggml_object * objects_begin;
936
    struct ggml_object * objects_end;
937
};
938
939
//
940
// data types
941
//
942
943
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
944
    "NONE",
945
946
    "DUP",
947
    "ADD",
948
    "ADD_ID",
949
    "ADD1",
950
    "ACC",
951
    "SUB",
952
    "MUL",
953
    "DIV",
954
    "SQR",
955
    "SQRT",
956
    "LOG",
957
    "SIN",
958
    "COS",
959
    "SUM",
960
    "SUM_ROWS",
961
    "CUMSUM",
962
    "MEAN",
963
    "ARGMAX",
964
    "COUNT_EQUAL",
965
    "REPEAT",
966
    "REPEAT_BACK",
967
    "CONCAT",
968
    "SILU_BACK",
969
    "NORM",
970
    "RMS_NORM",
971
    "RMS_NORM_BACK",
972
    "GROUP_NORM",
973
    "L2_NORM",
974
975
    "MUL_MAT",
976
    "MUL_MAT_ID",
977
    "OUT_PROD",
978
979
    "SCALE",
980
    "SET",
981
    "CPY",
982
    "CONT",
983
    "RESHAPE",
984
    "VIEW",
985
    "PERMUTE",
986
    "TRANSPOSE",
987
    "GET_ROWS",
988
    "GET_ROWS_BACK",
989
    "SET_ROWS",
990
    "DIAG",
991
    "DIAG_MASK_INF",
992
    "DIAG_MASK_ZERO",
993
    "SOFT_MAX",
994
    "SOFT_MAX_BACK",
995
    "ROPE",
996
    "ROPE_BACK",
997
    "CLAMP",
998
    "CONV_TRANSPOSE_1D",
999
    "IM2COL",
1000
    "IM2COL_BACK",
1001
    "IM2COL_3D",
1002
    "CONV_2D",
1003
    "CONV_3D",
1004
    "CONV_2D_DW",
1005
    "CONV_TRANSPOSE_2D",
1006
    "POOL_1D",
1007
    "POOL_2D",
1008
    "POOL_2D_BACK",
1009
    "UPSCALE",
1010
    "PAD",
1011
    "PAD_REFLECT_1D",
1012
    "ROLL",
1013
    "ARANGE",
1014
    "TIMESTEP_EMBEDDING",
1015
    "ARGSORT",
1016
    "TOP_K",
1017
    "LEAKY_RELU",
1018
    "TRI",
1019
    "FILL",
1020
1021
    "FLASH_ATTN_EXT",
1022
    "FLASH_ATTN_BACK",
1023
    "SSM_CONV",
1024
    "SSM_SCAN",
1025
    "WIN_PART",
1026
    "WIN_UNPART",
1027
    "GET_REL_POS",
1028
    "ADD_REL_POS",
1029
    "RWKV_WKV6",
1030
    "GATED_LINEAR_ATTN",
1031
    "RWKV_WKV7",
1032
    "SOLVE_TRI",
1033
1034
    "UNARY",
1035
1036
    "MAP_CUSTOM1",
1037
    "MAP_CUSTOM2",
1038
    "MAP_CUSTOM3",
1039
1040
    "CUSTOM",
1041
1042
    "CROSS_ENTROPY_LOSS",
1043
    "CROSS_ENTROPY_LOSS_BACK",
1044
    "OPT_STEP_ADAMW",
1045
    "OPT_STEP_SGD",
1046
1047
    "GLU",
1048
};
1049
1050
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1051
1052
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1053
    "none",
1054
1055
    "x",
1056
    "x+y",
1057
    "x[i]+y",
1058
    "x+y",
1059
    "view(x,nb,offset)+=y->x",
1060
    "x-y",
1061
    "x*y",
1062
    "x/y",
1063
    "x^2",
1064
    "√x",
1065
    "log(x)",
1066
    "sin(x)",
1067
    "cos(x)",
1068
    "Σx",
1069
    "Σx_k",
1070
    "cumsum(x)",
1071
    "Σx/n",
1072
    "argmax(x)",
1073
    "count_equal(x)",
1074
    "repeat(x)",
1075
    "repeat_back(x)",
1076
    "concat(x, y)",
1077
    "silu_back(x)",
1078
    "norm(x)",
1079
    "rms_norm(x)",
1080
    "rms_norm_back(x)",
1081
    "group_norm(x)",
1082
    "l2_norm(x)",
1083
1084
    "X*Y",
1085
    "X[i]*Y",
1086
    "X*Y",
1087
1088
    "x*v",
1089
    "y-\\>view(x)",
1090
    "x-\\>y",
1091
    "cont(x)",
1092
    "reshape(x)",
1093
    "view(x)",
1094
    "permute(x)",
1095
    "transpose(x)",
1096
    "get_rows(x)",
1097
    "get_rows_back(x)",
1098
    "set_rows(x)",
1099
    "diag(x)",
1100
    "diag_mask_inf(x)",
1101
    "diag_mask_zero(x)",
1102
    "soft_max(x)",
1103
    "soft_max_back(x)",
1104
    "rope(x)",
1105
    "rope_back(x)",
1106
    "clamp(x)",
1107
    "conv_transpose_1d(x)",
1108
    "im2col(x)",
1109
    "im2col_back(x)",
1110
    "im2col_3d(x)",
1111
    "conv_2d(x)",
1112
    "conv_3d(x)",
1113
    "conv_2d_dw(x)",
1114
    "conv_transpose_2d(x)",
1115
    "pool_1d(x)",
1116
    "pool_2d(x)",
1117
    "pool_2d_back(x)",
1118
    "upscale(x)",
1119
    "pad(x)",
1120
    "pad_reflect_1d(x)",
1121
    "roll(x)",
1122
    "arange(start, stop, step)",
1123
    "timestep_embedding(timesteps, dim, max_period)",
1124
    "argsort(x)",
1125
    "top_k(x)",
1126
    "leaky_relu(x)",
1127
    "tri(x)",
1128
    "fill(x, c)",
1129
1130
    "flash_attn_ext(x)",
1131
    "flash_attn_back(x)",
1132
    "ssm_conv(x)",
1133
    "ssm_scan(x)",
1134
    "win_part(x)",
1135
    "win_unpart(x)",
1136
    "get_rel_pos(x)",
1137
    "add_rel_pos(x)",
1138
    "rwkv_wkv6(k, v, r, tf, td, s)",
1139
    "gated_linear_attn(k, v, q, gate, s)",
1140
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1141
    "A X = B, A triangular, solve X",
1142
1143
    "unary(x)",
1144
1145
    "map_custom(x)",
1146
    "map_custom(x,y)",
1147
    "map_custom(x,y,z)",
1148
1149
    "custom(x)",
1150
1151
    "cross_entropy_loss(x,y)",
1152
    "cross_entropy_loss_back(x,y)",
1153
    "adamw(x)",
1154
    "sgd(x)",
1155
1156
    "glu(x)",
1157
};
1158
1159
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1160
1161
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1162
1163
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1164
    "ABS",
1165
    "SGN",
1166
    "NEG",
1167
    "STEP",
1168
    "TANH",
1169
    "ELU",
1170
    "RELU",
1171
    "SIGMOID",
1172
    "GELU",
1173
    "GELU_QUICK",
1174
    "SILU",
1175
    "HARDSWISH",
1176
    "HARDSIGMOID",
1177
    "EXP",
1178
    "EXPM1",
1179
    "SOFTPLUS",
1180
    "GELU_ERF",
1181
    "XIELU",
1182
    "FLOOR",
1183
    "CEIL",
1184
    "ROUND",
1185
    "TRUNC",
1186
};
1187
1188
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1189
1190
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1191
    "REGLU",
1192
    "GEGLU",
1193
    "SWIGLU",
1194
    "SWIGLU_OAI",
1195
    "GEGLU_ERF",
1196
    "GEGLU_QUICK",
1197
};
1198
1199
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1200
1201
1202
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1203
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1204
1205
1206
////////////////////////////////////////////////////////////////////////////////
1207
1208
0
void ggml_print_object(const struct ggml_object * obj) {
1209
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1210
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1211
0
}
1212
1213
0
void ggml_print_objects(const struct ggml_context * ctx) {
1214
0
    struct ggml_object * obj = ctx->objects_begin;
1215
1216
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1217
1218
0
    while (obj != NULL) {
1219
0
        ggml_print_object(obj);
1220
0
        obj = obj->next;
1221
0
    }
1222
1223
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1224
0
}
1225
1226
777
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1227
777
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1228
1229
777
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1230
777
}
1231
1232
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1233
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1234
1235
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1236
0
}
1237
1238
3.38k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1239
16.9k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1240
13.5k
        if (tensor->ne[i] <= 0) {
1241
2
            return 0;
1242
2
        }
1243
13.5k
    }
1244
1245
3.38k
    size_t nbytes;
1246
3.38k
    const size_t blck_size = ggml_blck_size(tensor->type);
1247
3.38k
    if (blck_size == 1) {
1248
3.38k
        nbytes = ggml_type_size(tensor->type);
1249
16.9k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1250
13.5k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1251
13.5k
        }
1252
3.38k
    }
1253
1
    else {
1254
1
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1255
4
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1256
3
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1257
3
        }
1258
1
    }
1259
1260
3.38k
    return nbytes;
1261
3.38k
}
1262
1263
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1264
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1265
0
}
1266
1267
6.57k
int64_t ggml_blck_size(enum ggml_type type) {
1268
6.57k
    return type_traits[type].blck_size;
1269
6.57k
}
1270
1271
6.57k
size_t ggml_type_size(enum ggml_type type) {
1272
6.57k
    return type_traits[type].type_size;
1273
6.57k
}
1274
1275
967
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1276
967
    assert(ne % ggml_blck_size(type) == 0);
1277
967
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1278
967
}
1279
1280
0
double ggml_type_sizef(enum ggml_type type) {
1281
0
    return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
1282
0
}
1283
1284
150
const char * ggml_type_name(enum ggml_type type) {
1285
150
    return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
1286
150
}
1287
1288
0
bool ggml_is_quantized(enum ggml_type type) {
1289
0
    return type_traits[type].is_quantized;
1290
0
}
1291
1292
0
const char * ggml_op_name(enum ggml_op op) {
1293
0
    return GGML_OP_NAME[op];
1294
0
}
1295
1296
0
const char * ggml_op_symbol(enum ggml_op op) {
1297
0
    return GGML_OP_SYMBOL[op];
1298
0
}
1299
1300
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1301
0
    return GGML_UNARY_OP_NAME[op];
1302
0
}
1303
1304
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1305
0
    return GGML_GLU_OP_NAME[op];
1306
0
}
1307
1308
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1309
0
    if (t->op == GGML_OP_UNARY) {
1310
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1311
0
        return ggml_unary_op_name(uop);
1312
0
    }
1313
0
    if (t->op == GGML_OP_GLU) {
1314
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1315
0
        return ggml_glu_op_name(gop);
1316
0
    }
1317
0
    return ggml_op_name(t->op);
1318
0
}
1319
1320
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1321
0
    return ggml_type_size(tensor->type);
1322
0
}
1323
1324
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1325
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1326
1327
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1328
0
}
1329
1330
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1331
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1332
1333
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1334
0
}
1335
1336
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1337
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1338
1339
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1340
0
}
1341
1342
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1343
0
    return tensor->ne[3] == 1;
1344
0
}
1345
1346
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1347
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1348
0
        if (tensor->ne[i] > 1) {
1349
0
            return i + 1;
1350
0
        }
1351
0
    }
1352
0
    return 1;
1353
0
}
1354
1355
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1356
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1357
1358
0
    switch (ftype) {
1359
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1360
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1361
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1362
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1363
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1364
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1365
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1366
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1367
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1368
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1369
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1370
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1371
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1372
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1373
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1374
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1375
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1376
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1377
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1378
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1379
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1380
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1381
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1382
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1383
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1384
0
    }
1385
1386
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1387
1388
0
    return wtype;
1389
0
}
1390
1391
316
size_t ggml_tensor_overhead(void) {
1392
316
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1393
316
}
1394
1395
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1396
0
    return tensor->nb[0] > tensor->nb[1];
1397
0
}
1398
1399
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1400
0
    size_t next_nb = ggml_type_size(tensor->type);
1401
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1402
0
        return false;
1403
0
    }
1404
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1405
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1406
0
        if (tensor->ne[i] != 1) {
1407
0
            if (i > n) {
1408
0
                if (tensor->nb[i] != next_nb) {
1409
0
                    return false;
1410
0
                }
1411
0
                next_nb *= tensor->ne[i];
1412
0
            } else {
1413
                // this dimension does not need to be contiguous
1414
0
                next_nb = tensor->ne[i]*tensor->nb[i];
1415
0
            }
1416
0
        }
1417
0
    }
1418
0
    return true;
1419
0
}
1420
1421
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1422
0
    return ggml_is_contiguous_0(tensor);
1423
0
}
1424
1425
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1426
0
    return ggml_is_contiguous_n(tensor, 0);
1427
0
}
1428
1429
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1430
0
    return ggml_is_contiguous_n(tensor, 1);
1431
0
}
1432
1433
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1434
0
    return ggml_is_contiguous_n(tensor, 2);
1435
0
}
1436
1437
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1438
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1439
0
}
1440
1441
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1442
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1443
1444
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1445
0
}
1446
1447
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1448
0
    return
1449
0
        tensor->nb[0] > tensor->nb[2] &&
1450
0
        tensor->nb[1] > tensor->nb[0] &&
1451
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1452
0
}
1453
1454
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1455
0
    return
1456
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1457
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1458
0
}
1459
1460
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1461
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1462
1463
0
    return
1464
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1465
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1466
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1467
0
}
1468
1469
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1470
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1471
0
        if (tensor->ne[i] == 0) {
1472
            // empty if any dimension has no elements
1473
0
            return true;
1474
0
        }
1475
0
    }
1476
0
    return false;
1477
0
}
1478
1479
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1480
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1481
1482
0
    return
1483
0
        (t0->ne[0] == t1->ne[0]) &&
1484
0
        (t0->ne[1] == t1->ne[1]) &&
1485
0
        (t0->ne[2] == t1->ne[2]) &&
1486
0
        (t0->ne[3] == t1->ne[3]);
1487
0
}
1488
1489
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1490
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1491
1492
0
    return
1493
0
        (t0->nb[0] == t1->nb[0]) &&
1494
0
        (t0->nb[1] == t1->nb[1]) &&
1495
0
        (t0->nb[2] == t1->nb[2]) &&
1496
0
        (t0->nb[3] == t1->nb[3]);
1497
0
}
1498
1499
// check if t1 can be represented as a repetition of t0
1500
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1501
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1502
1503
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1504
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1505
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1506
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1507
0
        (t1->ne[3]%t0->ne[3] == 0);
1508
0
}
1509
1510
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1511
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1512
1513
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1514
0
}
1515
1516
// assert that pointer is aligned to GGML_MEM_ALIGN
1517
#define GGML_ASSERT_ALIGNED(ptr) \
1518
2.33k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1519
1520
////////////////////////////////////////////////////////////////////////////////
1521
1522
1.36k
struct ggml_context * ggml_init(struct ggml_init_params params) {
1523
1.36k
    bool is_first_call = true;
1524
1525
1.36k
    ggml_critical_section_start();
1526
1527
1.36k
    if (is_first_call) {
1528
        // initialize time system (required on Windows)
1529
1.36k
        ggml_time_init();
1530
1531
1.36k
        is_first_call = false;
1532
1.36k
    }
1533
1534
1.36k
    ggml_critical_section_end();
1535
1536
1.36k
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1537
1538
    // allow to call ggml_init with 0 size
1539
1.36k
    if (params.mem_size == 0) {
1540
1.21k
        params.mem_size = GGML_MEM_ALIGN;
1541
1.21k
    }
1542
1543
1.36k
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1544
1545
1.36k
    *ctx = (struct ggml_context) {
1546
1.36k
        /*.mem_size           =*/ mem_size,
1547
1.36k
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1548
1.36k
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1549
1.36k
        /*.no_alloc           =*/ params.no_alloc,
1550
1.36k
        /*.n_objects          =*/ 0,
1551
1.36k
        /*.objects_begin      =*/ NULL,
1552
1.36k
        /*.objects_end        =*/ NULL,
1553
1.36k
    };
1554
1555
1.36k
    GGML_ASSERT(ctx->mem_buffer != NULL);
1556
1557
1.36k
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1558
1559
1.36k
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1560
1561
1.36k
    return ctx;
1562
1.36k
}
1563
1564
0
void ggml_reset(struct ggml_context * ctx) {
1565
0
    if (ctx == NULL) {
1566
0
        return;
1567
0
    }
1568
1569
0
    ctx->n_objects     = 0;
1570
0
    ctx->objects_begin = NULL;
1571
0
    ctx->objects_end   = NULL;
1572
0
}
1573
1574
1.36k
void ggml_free(struct ggml_context * ctx) {
1575
1.36k
    if (ctx == NULL) {
1576
0
        return;
1577
0
    }
1578
1579
1.36k
    if (ctx->mem_buffer_owned) {
1580
1.36k
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1581
1.36k
    }
1582
1583
1.36k
    GGML_FREE(ctx);
1584
1.36k
}
1585
1586
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1587
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1588
0
}
1589
1590
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1591
0
    return ctx->no_alloc;
1592
0
}
1593
1594
632
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1595
632
    ctx->no_alloc = no_alloc;
1596
632
}
1597
1598
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1599
0
    return ctx->mem_buffer;
1600
0
}
1601
1602
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1603
0
    return ctx->mem_size;
1604
0
}
1605
1606
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1607
0
    size_t max_size = 0;
1608
1609
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1610
0
        size_t bytes = ggml_nbytes(tensor);
1611
0
        max_size = MAX(max_size, bytes);
1612
0
    }
1613
1614
0
    return max_size;
1615
0
}
1616
1617
////////////////////////////////////////////////////////////////////////////////
1618
1619
967
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1620
    // always insert objects at the end of the context's memory pool
1621
967
    struct ggml_object * obj_cur = ctx->objects_end;
1622
1623
967
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1624
967
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1625
967
    const size_t cur_end  = cur_offs + cur_size;
1626
1627
    // align to GGML_MEM_ALIGN
1628
967
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1629
1630
967
    char * const mem_buffer = ctx->mem_buffer;
1631
967
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1632
1633
967
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1634
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1635
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1636
#ifndef NDEBUG
1637
        GGML_ABORT("not enough space in the context's memory pool");
1638
#endif
1639
0
        return NULL;
1640
0
    }
1641
1642
967
    *obj_new = (struct ggml_object) {
1643
967
        .offs = cur_end + GGML_OBJECT_SIZE,
1644
967
        .size = size_needed,
1645
967
        .next = NULL,
1646
967
        .type = type,
1647
967
    };
1648
1649
967
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1650
1651
967
    if (obj_cur != NULL) {
1652
810
        obj_cur->next = obj_new;
1653
810
    } else {
1654
        // this is the first object in this context
1655
157
        ctx->objects_begin = obj_new;
1656
157
    }
1657
1658
967
    ctx->objects_end = obj_new;
1659
1660
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1661
1662
967
    return obj_new;
1663
967
}
1664
1665
static struct ggml_tensor * ggml_new_tensor_impl(
1666
        struct ggml_context * ctx,
1667
        enum   ggml_type      type,
1668
        int                   n_dims,
1669
        const int64_t       * ne,
1670
        struct ggml_tensor  * view_src,
1671
967
        size_t                view_offs) {
1672
1673
967
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1674
967
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1675
1676
    // find the base tensor and absolute offset
1677
967
    if (view_src != NULL && view_src->view_src != NULL) {
1678
0
        view_offs += view_src->view_offs;
1679
0
        view_src   = view_src->view_src;
1680
0
    }
1681
1682
967
    size_t data_size = ggml_row_size(type, ne[0]);
1683
3.86k
    for (int i = 1; i < n_dims; i++) {
1684
2.90k
        data_size *= ne[i];
1685
2.90k
    }
1686
1687
967
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1688
1689
967
    void * data = view_src != NULL ? view_src->data : NULL;
1690
967
    if (data != NULL) {
1691
0
        data = (char *) data + view_offs;
1692
0
    }
1693
1694
967
    size_t obj_alloc_size = 0;
1695
1696
967
    if (view_src == NULL && !ctx->no_alloc) {
1697
        // allocate tensor data in the context's memory pool
1698
0
        obj_alloc_size = data_size;
1699
0
    }
1700
1701
967
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1702
967
    GGML_ASSERT(obj_new);
1703
1704
967
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1705
1706
967
    *result = (struct ggml_tensor) {
1707
967
        /*.type         =*/ type,
1708
967
        /*.buffer       =*/ NULL,
1709
967
        /*.ne           =*/ { 1, 1, 1, 1 },
1710
967
        /*.nb           =*/ { 0, 0, 0, 0 },
1711
967
        /*.op           =*/ GGML_OP_NONE,
1712
967
        /*.op_params    =*/ { 0 },
1713
967
        /*.flags        =*/ 0,
1714
967
        /*.src          =*/ { NULL },
1715
967
        /*.view_src     =*/ view_src,
1716
967
        /*.view_offs    =*/ view_offs,
1717
967
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1718
967
        /*.name         =*/ { 0 },
1719
967
        /*.extra        =*/ NULL,
1720
967
        /*.padding      =*/ { 0 },
1721
967
    };
1722
1723
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1724
    //GGML_ASSERT_ALIGNED(result->data);
1725
1726
4.83k
    for (int i = 0; i < n_dims; i++) {
1727
3.86k
        result->ne[i] = ne[i];
1728
3.86k
    }
1729
1730
967
    result->nb[0] = ggml_type_size(type);
1731
967
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1732
2.90k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1733
1.93k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1734
1.93k
    }
1735
1736
967
    ctx->n_objects++;
1737
1738
967
    return result;
1739
967
}
1740
1741
struct ggml_tensor * ggml_new_tensor(
1742
        struct ggml_context * ctx,
1743
        enum   ggml_type      type,
1744
        int                   n_dims,
1745
967
        const int64_t       * ne) {
1746
967
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1747
967
}
1748
1749
struct ggml_tensor * ggml_new_tensor_1d(
1750
        struct ggml_context * ctx,
1751
        enum   ggml_type      type,
1752
0
        int64_t ne0) {
1753
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1754
0
}
1755
1756
struct ggml_tensor * ggml_new_tensor_2d(
1757
        struct ggml_context * ctx,
1758
        enum   ggml_type      type,
1759
        int64_t ne0,
1760
0
        int64_t ne1) {
1761
0
    const int64_t ne[2] = { ne0, ne1 };
1762
0
    return ggml_new_tensor(ctx, type, 2, ne);
1763
0
}
1764
1765
struct ggml_tensor * ggml_new_tensor_3d(
1766
        struct ggml_context * ctx,
1767
        enum   ggml_type      type,
1768
        int64_t ne0,
1769
        int64_t ne1,
1770
0
        int64_t ne2) {
1771
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1772
0
    return ggml_new_tensor(ctx, type, 3, ne);
1773
0
}
1774
1775
struct ggml_tensor * ggml_new_tensor_4d(
1776
        struct ggml_context * ctx,
1777
        enum   ggml_type type,
1778
        int64_t ne0,
1779
        int64_t ne1,
1780
        int64_t ne2,
1781
0
        int64_t ne3) {
1782
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1783
0
    return ggml_new_tensor(ctx, type, 4, ne);
1784
0
}
1785
1786
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1787
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1788
1789
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1790
0
}
1791
1792
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1793
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1794
0
}
1795
1796
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1797
0
    const int64_t ne2 = tensor->ne[2];
1798
0
    const int64_t ne1 = tensor->ne[1];
1799
0
    const int64_t ne0 = tensor->ne[0];
1800
1801
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1802
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1803
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1804
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1805
1806
0
    if (i0) {
1807
0
        * i0 = i0_;
1808
0
    }
1809
0
    if (i1) {
1810
0
        * i1 = i1_;
1811
0
    }
1812
0
    if (i2) {
1813
0
        * i2 = i2_;
1814
0
    }
1815
0
    if (i3) {
1816
0
        * i3 = i3_;
1817
0
    }
1818
0
}
1819
1820
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1821
0
    return tensor->data;
1822
0
}
1823
1824
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1825
0
    assert(tensor->type == GGML_TYPE_F32);
1826
0
    return (float *)(tensor->data);
1827
0
}
1828
1829
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1830
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1831
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1832
0
}
1833
1834
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1835
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1836
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1837
0
}
1838
1839
870
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1840
870
    return tensor->name;
1841
870
}
1842
1843
2.27k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1844
2.27k
    size_t i;
1845
18.0k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1846
15.7k
        tensor->name[i] = name[i];
1847
15.7k
    }
1848
2.27k
    tensor->name[i] = '\0';
1849
2.27k
    return tensor;
1850
2.27k
}
1851
1852
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1853
0
    va_list args;
1854
0
    va_start(args, fmt);
1855
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1856
0
    va_end(args);
1857
0
    return tensor;
1858
0
}
1859
1860
struct ggml_tensor * ggml_view_tensor(
1861
        struct ggml_context * ctx,
1862
0
        struct ggml_tensor  * src) {
1863
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1864
0
    ggml_format_name(result, "%s (view)", src->name);
1865
1866
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1867
0
        result->nb[i] = src->nb[i];
1868
0
    }
1869
1870
0
    return result;
1871
0
}
1872
1873
315
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1874
315
    struct ggml_object * obj = ctx->objects_begin;
1875
1876
315
    char * const mem_buffer = ctx->mem_buffer;
1877
1878
315
    while (obj != NULL) {
1879
157
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1880
157
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1881
157
        }
1882
1883
0
        obj = obj->next;
1884
0
    }
1885
1886
158
    return NULL;
1887
315
}
1888
1889
684
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1890
684
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1891
684
    obj = obj->next;
1892
1893
684
    char * const mem_buffer = ctx->mem_buffer;
1894
1895
684
    while (obj != NULL) {
1896
620
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1897
620
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1898
620
        }
1899
1900
0
        obj = obj->next;
1901
0
    }
1902
1903
64
    return NULL;
1904
684
}
1905
1906
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1907
0
    struct ggml_object * obj = ctx->objects_begin;
1908
1909
0
    char * const mem_buffer = ctx->mem_buffer;
1910
1911
0
    while (obj != NULL) {
1912
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1913
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1914
0
            if (strcmp(cur->name, name) == 0) {
1915
0
                return cur;
1916
0
            }
1917
0
        }
1918
1919
0
        obj = obj->next;
1920
0
    }
1921
1922
0
    return NULL;
1923
0
}
1924
1925
////////////////////////////////////////////////////////////////////////////////
1926
1927
// ggml_dup
1928
1929
static struct ggml_tensor * ggml_dup_impl(
1930
        struct ggml_context * ctx,
1931
        struct ggml_tensor  * a,
1932
0
        bool                  inplace) {
1933
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1934
1935
0
    result->op     = GGML_OP_DUP;
1936
0
    result->src[0] = a;
1937
1938
0
    return result;
1939
0
}
1940
1941
struct ggml_tensor * ggml_dup(
1942
        struct ggml_context * ctx,
1943
0
        struct ggml_tensor  * a) {
1944
0
    return ggml_dup_impl(ctx, a, false);
1945
0
}
1946
1947
struct ggml_tensor * ggml_dup_inplace(
1948
        struct ggml_context * ctx,
1949
0
        struct ggml_tensor  * a) {
1950
0
    return ggml_dup_impl(ctx, a, true);
1951
0
}
1952
1953
// ggml_add
1954
1955
static struct ggml_tensor * ggml_add_impl(
1956
        struct ggml_context * ctx,
1957
        struct ggml_tensor  * a,
1958
        struct ggml_tensor  * b,
1959
0
        bool                  inplace) {
1960
0
    GGML_ASSERT(ggml_can_repeat(b, a));
1961
1962
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1963
1964
0
    result->op     = GGML_OP_ADD;
1965
0
    result->src[0] = a;
1966
0
    result->src[1] = b;
1967
1968
0
    return result;
1969
0
}
1970
1971
struct ggml_tensor * ggml_add(
1972
        struct ggml_context * ctx,
1973
        struct ggml_tensor  * a,
1974
0
        struct ggml_tensor  * b) {
1975
0
    return ggml_add_impl(ctx, a, b, false);
1976
0
}
1977
1978
struct ggml_tensor * ggml_add_inplace(
1979
        struct ggml_context * ctx,
1980
        struct ggml_tensor  * a,
1981
0
        struct ggml_tensor  * b) {
1982
0
    return ggml_add_impl(ctx, a, b, true);
1983
0
}
1984
1985
// ggml_add_cast
1986
1987
static struct ggml_tensor * ggml_add_cast_impl(
1988
        struct ggml_context * ctx,
1989
        struct ggml_tensor  * a,
1990
        struct ggml_tensor  * b,
1991
0
        enum   ggml_type      type) {
1992
    // TODO: support less-strict constraint
1993
    //       GGML_ASSERT(ggml_can_repeat(b, a));
1994
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
1995
1996
    // currently only supported for quantized input and f16
1997
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
1998
0
                a->type == GGML_TYPE_F16 ||
1999
0
                a->type == GGML_TYPE_BF16);
2000
2001
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2002
2003
0
    result->op     = GGML_OP_ADD;
2004
0
    result->src[0] = a;
2005
0
    result->src[1] = b;
2006
2007
0
    return result;
2008
0
}
2009
2010
struct ggml_tensor * ggml_add_cast(
2011
        struct ggml_context * ctx,
2012
        struct ggml_tensor  * a,
2013
        struct ggml_tensor  * b,
2014
0
        enum   ggml_type      type) {
2015
0
    return ggml_add_cast_impl(ctx, a, b, type);
2016
0
}
2017
2018
struct ggml_tensor * ggml_add_id(
2019
            struct ggml_context * ctx,
2020
            struct ggml_tensor  * a,
2021
            struct ggml_tensor  * b,
2022
0
            struct ggml_tensor  * ids) {
2023
2024
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2025
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2026
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2027
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2028
2029
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2030
2031
0
    result->op     = GGML_OP_ADD_ID;
2032
0
    result->src[0] = a;
2033
0
    result->src[1] = b;
2034
0
    result->src[2] = ids;
2035
2036
0
    return result;
2037
0
}
2038
2039
// ggml_add1
2040
2041
static struct ggml_tensor * ggml_add1_impl(
2042
        struct ggml_context * ctx,
2043
        struct ggml_tensor  * a,
2044
        struct ggml_tensor  * b,
2045
0
        bool                  inplace) {
2046
0
    GGML_ASSERT(ggml_is_scalar(b));
2047
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2048
2049
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2050
2051
0
    result->op     = GGML_OP_ADD1;
2052
0
    result->src[0] = a;
2053
0
    result->src[1] = b;
2054
2055
0
    return result;
2056
0
}
2057
2058
struct ggml_tensor * ggml_add1(
2059
        struct ggml_context * ctx,
2060
        struct ggml_tensor  * a,
2061
0
        struct ggml_tensor  * b) {
2062
0
    return ggml_add1_impl(ctx, a, b, false);
2063
0
}
2064
2065
struct ggml_tensor * ggml_add1_inplace(
2066
        struct ggml_context * ctx,
2067
        struct ggml_tensor  * a,
2068
0
        struct ggml_tensor  * b) {
2069
0
    return ggml_add1_impl(ctx, a, b, true);
2070
0
}
2071
2072
// ggml_acc
2073
2074
static struct ggml_tensor * ggml_acc_impl(
2075
        struct ggml_context * ctx,
2076
        struct ggml_tensor  * a,
2077
        struct ggml_tensor  * b,
2078
        size_t                nb1,
2079
        size_t                nb2,
2080
        size_t                nb3,
2081
        size_t                offset,
2082
0
        bool                  inplace) {
2083
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2084
0
    GGML_ASSERT(ggml_is_contiguous(a));
2085
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2086
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2087
2088
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2089
2090
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2091
0
    ggml_set_op_params(result, params, sizeof(params));
2092
2093
0
    result->op     = GGML_OP_ACC;
2094
0
    result->src[0] = a;
2095
0
    result->src[1] = b;
2096
2097
0
    return result;
2098
0
}
2099
2100
struct ggml_tensor * ggml_acc(
2101
        struct ggml_context * ctx,
2102
        struct ggml_tensor  * a,
2103
        struct ggml_tensor  * b,
2104
        size_t                nb1,
2105
        size_t                nb2,
2106
        size_t                nb3,
2107
0
        size_t                offset) {
2108
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2109
0
}
2110
2111
struct ggml_tensor * ggml_acc_inplace(
2112
        struct ggml_context * ctx,
2113
        struct ggml_tensor  * a,
2114
        struct ggml_tensor  * b,
2115
        size_t                nb1,
2116
        size_t                nb2,
2117
        size_t                nb3,
2118
0
        size_t                offset) {
2119
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2120
0
}
2121
2122
// ggml_sub
2123
2124
static struct ggml_tensor * ggml_sub_impl(
2125
        struct ggml_context * ctx,
2126
        struct ggml_tensor  * a,
2127
        struct ggml_tensor  * b,
2128
0
        bool                  inplace) {
2129
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2130
2131
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2132
2133
0
    result->op     = GGML_OP_SUB;
2134
0
    result->src[0] = a;
2135
0
    result->src[1] = b;
2136
2137
0
    return result;
2138
0
}
2139
2140
struct ggml_tensor * ggml_sub(
2141
        struct ggml_context * ctx,
2142
        struct ggml_tensor  * a,
2143
0
        struct ggml_tensor  * b) {
2144
0
    return ggml_sub_impl(ctx, a, b, false);
2145
0
}
2146
2147
struct ggml_tensor * ggml_sub_inplace(
2148
        struct ggml_context * ctx,
2149
        struct ggml_tensor  * a,
2150
0
        struct ggml_tensor  * b) {
2151
0
    return ggml_sub_impl(ctx, a, b, true);
2152
0
}
2153
2154
// ggml_mul
2155
2156
static struct ggml_tensor * ggml_mul_impl(
2157
        struct ggml_context * ctx,
2158
        struct ggml_tensor  * a,
2159
        struct ggml_tensor  * b,
2160
0
        bool                  inplace) {
2161
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2162
2163
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2164
2165
0
    result->op     = GGML_OP_MUL;
2166
0
    result->src[0] = a;
2167
0
    result->src[1] = b;
2168
2169
0
    return result;
2170
0
}
2171
2172
struct ggml_tensor * ggml_mul(
2173
        struct ggml_context * ctx,
2174
        struct ggml_tensor  * a,
2175
0
        struct ggml_tensor  * b) {
2176
0
    return ggml_mul_impl(ctx, a, b, false);
2177
0
}
2178
2179
struct ggml_tensor * ggml_mul_inplace(
2180
        struct ggml_context * ctx,
2181
        struct ggml_tensor  * a,
2182
0
        struct ggml_tensor  * b) {
2183
0
    return ggml_mul_impl(ctx, a, b, true);
2184
0
}
2185
2186
// ggml_div
2187
2188
static struct ggml_tensor * ggml_div_impl(
2189
        struct ggml_context * ctx,
2190
        struct ggml_tensor  * a,
2191
        struct ggml_tensor  * b,
2192
0
        bool                  inplace) {
2193
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2194
2195
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2196
2197
0
    result->op     = GGML_OP_DIV;
2198
0
    result->src[0] = a;
2199
0
    result->src[1] = b;
2200
2201
0
    return result;
2202
0
}
2203
2204
struct ggml_tensor * ggml_div(
2205
        struct ggml_context * ctx,
2206
        struct ggml_tensor  * a,
2207
0
        struct ggml_tensor  * b) {
2208
0
    return ggml_div_impl(ctx, a, b, false);
2209
0
}
2210
2211
struct ggml_tensor * ggml_div_inplace(
2212
        struct ggml_context * ctx,
2213
        struct ggml_tensor  * a,
2214
0
        struct ggml_tensor  * b) {
2215
0
    return ggml_div_impl(ctx, a, b, true);
2216
0
}
2217
2218
// ggml_sqr
2219
2220
static struct ggml_tensor * ggml_sqr_impl(
2221
        struct ggml_context * ctx,
2222
        struct ggml_tensor  * a,
2223
0
        bool                  inplace) {
2224
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2225
2226
0
    result->op     = GGML_OP_SQR;
2227
0
    result->src[0] = a;
2228
2229
0
    return result;
2230
0
}
2231
2232
struct ggml_tensor * ggml_sqr(
2233
        struct ggml_context * ctx,
2234
0
        struct ggml_tensor  * a) {
2235
0
    return ggml_sqr_impl(ctx, a, false);
2236
0
}
2237
2238
struct ggml_tensor * ggml_sqr_inplace(
2239
        struct ggml_context * ctx,
2240
0
        struct ggml_tensor  * a) {
2241
0
    return ggml_sqr_impl(ctx, a, true);
2242
0
}
2243
2244
// ggml_sqrt
2245
2246
static struct ggml_tensor * ggml_sqrt_impl(
2247
        struct ggml_context * ctx,
2248
        struct ggml_tensor  * a,
2249
0
        bool                  inplace) {
2250
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2251
2252
0
    result->op     = GGML_OP_SQRT;
2253
0
    result->src[0] = a;
2254
2255
0
    return result;
2256
0
}
2257
2258
struct ggml_tensor * ggml_sqrt(
2259
        struct ggml_context * ctx,
2260
0
        struct ggml_tensor  * a) {
2261
0
    return ggml_sqrt_impl(ctx, a, false);
2262
0
}
2263
2264
struct ggml_tensor * ggml_sqrt_inplace(
2265
        struct ggml_context * ctx,
2266
0
        struct ggml_tensor  * a) {
2267
0
    return ggml_sqrt_impl(ctx, a, true);
2268
0
}
2269
2270
// ggml_log
2271
2272
static struct ggml_tensor * ggml_log_impl(
2273
        struct ggml_context * ctx,
2274
        struct ggml_tensor  * a,
2275
0
        bool                  inplace) {
2276
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2277
2278
0
    result->op     = GGML_OP_LOG;
2279
0
    result->src[0] = a;
2280
2281
0
    return result;
2282
0
}
2283
2284
struct ggml_tensor * ggml_log(
2285
        struct ggml_context * ctx,
2286
0
        struct ggml_tensor  * a) {
2287
0
    return ggml_log_impl(ctx, a, false);
2288
0
}
2289
2290
struct ggml_tensor * ggml_log_inplace(
2291
        struct ggml_context * ctx,
2292
0
        struct ggml_tensor  * a) {
2293
0
    return ggml_log_impl(ctx, a, true);
2294
0
}
2295
2296
struct ggml_tensor * ggml_expm1(
2297
        struct ggml_context * ctx,
2298
0
        struct ggml_tensor  * a) {
2299
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2300
0
}
2301
2302
struct ggml_tensor * ggml_expm1_inplace(
2303
        struct ggml_context * ctx,
2304
0
        struct ggml_tensor  * a) {
2305
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2306
0
}
2307
2308
struct ggml_tensor * ggml_softplus(
2309
        struct ggml_context * ctx,
2310
0
        struct ggml_tensor  * a) {
2311
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2312
0
}
2313
2314
struct ggml_tensor * ggml_softplus_inplace(
2315
        struct ggml_context * ctx,
2316
0
        struct ggml_tensor  * a) {
2317
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2318
0
}
2319
2320
// ggml_sin
2321
2322
static struct ggml_tensor * ggml_sin_impl(
2323
        struct ggml_context * ctx,
2324
        struct ggml_tensor  * a,
2325
0
        bool                  inplace) {
2326
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2327
2328
0
    result->op     = GGML_OP_SIN;
2329
0
    result->src[0] = a;
2330
2331
0
    return result;
2332
0
}
2333
2334
struct ggml_tensor * ggml_sin(
2335
        struct ggml_context * ctx,
2336
0
        struct ggml_tensor  * a) {
2337
0
    return ggml_sin_impl(ctx, a, false);
2338
0
}
2339
2340
struct ggml_tensor * ggml_sin_inplace(
2341
        struct ggml_context * ctx,
2342
0
        struct ggml_tensor  * a) {
2343
0
    return ggml_sin_impl(ctx, a, true);
2344
0
}
2345
2346
// ggml_cos
2347
2348
static struct ggml_tensor * ggml_cos_impl(
2349
        struct ggml_context * ctx,
2350
        struct ggml_tensor  * a,
2351
0
        bool                  inplace) {
2352
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2353
2354
0
    result->op     = GGML_OP_COS;
2355
0
    result->src[0] = a;
2356
2357
0
    return result;
2358
0
}
2359
2360
struct ggml_tensor * ggml_cos(
2361
        struct ggml_context * ctx,
2362
0
        struct ggml_tensor  * a) {
2363
0
    return ggml_cos_impl(ctx, a, false);
2364
0
}
2365
2366
struct ggml_tensor * ggml_cos_inplace(
2367
        struct ggml_context * ctx,
2368
0
        struct ggml_tensor  * a) {
2369
0
    return ggml_cos_impl(ctx, a, true);
2370
0
}
2371
2372
// ggml_sum
2373
2374
struct ggml_tensor * ggml_sum(
2375
        struct ggml_context * ctx,
2376
0
        struct ggml_tensor  * a) {
2377
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2378
2379
0
    result->op     = GGML_OP_SUM;
2380
0
    result->src[0] = a;
2381
2382
0
    return result;
2383
0
}
2384
2385
// ggml_sum_rows
2386
2387
struct ggml_tensor * ggml_sum_rows(
2388
        struct ggml_context * ctx,
2389
0
        struct ggml_tensor  * a) {
2390
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2391
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2392
0
        ne[i] = a->ne[i];
2393
0
    }
2394
2395
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2396
2397
0
    result->op     = GGML_OP_SUM_ROWS;
2398
0
    result->src[0] = a;
2399
2400
0
    return result;
2401
0
}
2402
2403
// ggml_cumsum
2404
2405
struct ggml_tensor * ggml_cumsum(
2406
        struct ggml_context * ctx,
2407
0
        struct ggml_tensor  * a) {
2408
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2409
2410
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2411
2412
0
    result->op     = GGML_OP_CUMSUM;
2413
0
    result->src[0] = a;
2414
2415
0
    return result;
2416
0
}
2417
2418
// ggml_mean
2419
2420
struct ggml_tensor * ggml_mean(
2421
        struct ggml_context * ctx,
2422
0
        struct ggml_tensor  * a) {
2423
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2424
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2425
2426
0
    result->op     = GGML_OP_MEAN;
2427
0
    result->src[0] = a;
2428
2429
0
    return result;
2430
0
}
2431
2432
// ggml_argmax
2433
2434
struct ggml_tensor * ggml_argmax(
2435
        struct ggml_context * ctx,
2436
0
        struct ggml_tensor  * a) {
2437
0
    GGML_ASSERT(ggml_is_matrix(a));
2438
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2439
2440
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2441
2442
0
    result->op     = GGML_OP_ARGMAX;
2443
0
    result->src[0] = a;
2444
2445
0
    return result;
2446
0
}
2447
2448
// ggml_count_equal
2449
2450
struct ggml_tensor * ggml_count_equal(
2451
        struct ggml_context * ctx,
2452
        struct ggml_tensor  * a,
2453
0
        struct ggml_tensor  * b) {
2454
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2455
2456
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2457
2458
0
    result->op     = GGML_OP_COUNT_EQUAL;
2459
0
    result->src[0] = a;
2460
0
    result->src[1] = b;
2461
2462
0
    return result;
2463
0
}
2464
2465
// ggml_repeat
2466
2467
struct ggml_tensor * ggml_repeat(
2468
        struct ggml_context * ctx,
2469
        struct ggml_tensor  * a,
2470
0
        struct ggml_tensor  * b) {
2471
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2472
2473
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2474
2475
0
    result->op     = GGML_OP_REPEAT;
2476
0
    result->src[0] = a;
2477
2478
0
    return result;
2479
0
}
2480
2481
struct ggml_tensor * ggml_repeat_4d(
2482
        struct ggml_context * ctx,
2483
        struct ggml_tensor * a,
2484
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2485
0
    const bool can_repeat = ggml_is_empty(a) || (
2486
0
        (ne0 % a->ne[0] == 0) &&
2487
0
        (ne1 % a->ne[1] == 0) &&
2488
0
        (ne2 % a->ne[2] == 0) &&
2489
0
        (ne3 % a->ne[3] == 0)
2490
0
    );
2491
0
    GGML_ASSERT(can_repeat);
2492
2493
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2494
2495
0
    result->op     = GGML_OP_REPEAT;
2496
0
    result->src[0] = a;
2497
2498
0
    return result;
2499
0
}
2500
2501
// ggml_repeat_back
2502
2503
struct ggml_tensor * ggml_repeat_back(
2504
        struct ggml_context * ctx,
2505
        struct ggml_tensor  * a,
2506
0
        struct ggml_tensor  * b) {
2507
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2508
2509
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2510
2511
0
    result->op     = GGML_OP_REPEAT_BACK;
2512
0
    result->src[0] = a;
2513
2514
0
    return result;
2515
0
}
2516
2517
// ggml_concat
2518
2519
struct ggml_tensor * ggml_concat(
2520
    struct ggml_context * ctx,
2521
    struct ggml_tensor  * a,
2522
    struct ggml_tensor  * b,
2523
0
    int                   dim) {
2524
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2525
0
    GGML_ASSERT(a->type == b->type);
2526
2527
0
    int64_t ne[GGML_MAX_DIMS];
2528
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2529
0
        if (d == dim) {
2530
0
            ne[d] = a->ne[d] + b->ne[d];
2531
0
            continue;
2532
0
        }
2533
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2534
0
        ne[d] = a->ne[d];
2535
0
    }
2536
2537
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2538
2539
0
    ggml_set_op_params_i32(result, 0, dim);
2540
2541
0
    result->op     = GGML_OP_CONCAT;
2542
0
    result->src[0] = a;
2543
0
    result->src[1] = b;
2544
2545
0
    return result;
2546
0
}
2547
2548
// ggml_abs
2549
2550
struct ggml_tensor * ggml_abs(
2551
        struct ggml_context * ctx,
2552
0
        struct ggml_tensor  * a) {
2553
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2554
0
}
2555
2556
struct ggml_tensor * ggml_abs_inplace(
2557
        struct ggml_context * ctx,
2558
0
        struct ggml_tensor  * a) {
2559
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2560
0
}
2561
2562
// ggml_sgn
2563
2564
struct ggml_tensor * ggml_sgn(
2565
        struct ggml_context * ctx,
2566
0
        struct ggml_tensor  * a) {
2567
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2568
0
}
2569
2570
struct ggml_tensor * ggml_sgn_inplace(
2571
        struct ggml_context * ctx,
2572
0
        struct ggml_tensor  * a) {
2573
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2574
0
}
2575
2576
// ggml_neg
2577
2578
struct ggml_tensor * ggml_neg(
2579
        struct ggml_context * ctx,
2580
0
        struct ggml_tensor  * a) {
2581
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2582
0
}
2583
2584
struct ggml_tensor * ggml_neg_inplace(
2585
        struct ggml_context * ctx,
2586
0
        struct ggml_tensor  * a) {
2587
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2588
0
}
2589
2590
// ggml_step
2591
2592
struct ggml_tensor * ggml_step(
2593
        struct ggml_context * ctx,
2594
0
        struct ggml_tensor  * a) {
2595
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2596
0
}
2597
2598
struct ggml_tensor * ggml_step_inplace(
2599
        struct ggml_context * ctx,
2600
0
        struct ggml_tensor  * a) {
2601
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2602
0
}
2603
2604
// ggml_tanh
2605
2606
struct ggml_tensor * ggml_tanh(
2607
        struct ggml_context * ctx,
2608
0
        struct ggml_tensor  * a) {
2609
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2610
0
}
2611
2612
struct ggml_tensor * ggml_tanh_inplace(
2613
        struct ggml_context * ctx,
2614
0
        struct ggml_tensor  * a) {
2615
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2616
0
}
2617
2618
// ggml_elu
2619
2620
struct ggml_tensor * ggml_elu(
2621
    struct ggml_context * ctx,
2622
0
    struct ggml_tensor  * a) {
2623
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2624
0
}
2625
2626
struct ggml_tensor * ggml_elu_inplace(
2627
    struct ggml_context * ctx,
2628
0
    struct ggml_tensor  * a) {
2629
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2630
0
}
2631
2632
// ggml_relu
2633
2634
struct ggml_tensor * ggml_relu(
2635
        struct ggml_context * ctx,
2636
0
        struct ggml_tensor  * a) {
2637
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2638
0
}
2639
2640
struct ggml_tensor * ggml_relu_inplace(
2641
        struct ggml_context * ctx,
2642
0
        struct ggml_tensor  * a) {
2643
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2644
0
}
2645
2646
// ggml_leaky_relu
2647
2648
struct ggml_tensor * ggml_leaky_relu(
2649
        struct ggml_context * ctx,
2650
        struct ggml_tensor  * a,
2651
        float                 negative_slope,
2652
0
        bool                  inplace) {
2653
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2654
2655
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2656
2657
0
    result->op     = GGML_OP_LEAKY_RELU;
2658
0
    result->src[0] = a;
2659
2660
0
    return result;
2661
0
}
2662
2663
// ggml_sigmoid
2664
2665
struct ggml_tensor * ggml_sigmoid(
2666
        struct ggml_context * ctx,
2667
0
        struct ggml_tensor  * a) {
2668
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2669
0
}
2670
2671
struct ggml_tensor * ggml_sigmoid_inplace(
2672
        struct ggml_context * ctx,
2673
0
        struct ggml_tensor  * a) {
2674
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2675
0
}
2676
2677
// ggml_gelu
2678
2679
struct ggml_tensor * ggml_gelu(
2680
        struct ggml_context * ctx,
2681
0
        struct ggml_tensor  * a) {
2682
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2683
0
}
2684
2685
struct ggml_tensor * ggml_gelu_inplace(
2686
        struct ggml_context * ctx,
2687
0
        struct ggml_tensor  * a) {
2688
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2689
0
}
2690
2691
// ggml_gelu_erf
2692
2693
struct ggml_tensor * ggml_gelu_erf(
2694
        struct ggml_context * ctx,
2695
0
        struct ggml_tensor  * a) {
2696
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2697
0
}
2698
2699
struct ggml_tensor * ggml_gelu_erf_inplace(
2700
        struct ggml_context * ctx,
2701
0
        struct ggml_tensor  * a) {
2702
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2703
0
}
2704
2705
// ggml_gelu_quick
2706
2707
struct ggml_tensor * ggml_gelu_quick(
2708
        struct ggml_context * ctx,
2709
0
        struct ggml_tensor  * a) {
2710
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2711
0
}
2712
2713
struct ggml_tensor * ggml_gelu_quick_inplace(
2714
        struct ggml_context * ctx,
2715
0
        struct ggml_tensor  * a) {
2716
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2717
0
}
2718
2719
// ggml_silu
2720
2721
struct ggml_tensor * ggml_silu(
2722
        struct ggml_context * ctx,
2723
0
        struct ggml_tensor  * a) {
2724
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2725
0
}
2726
2727
struct ggml_tensor * ggml_silu_inplace(
2728
        struct ggml_context * ctx,
2729
0
        struct ggml_tensor  * a) {
2730
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2731
0
}
2732
2733
// ggml_xielu
2734
2735
struct ggml_tensor * ggml_xielu(
2736
        struct ggml_context * ctx,
2737
        struct ggml_tensor  * a,
2738
        float alpha_n,
2739
        float alpha_p,
2740
        float beta,
2741
0
        float eps) {
2742
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2743
2744
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2745
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2746
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2747
0
    ggml_set_op_params_f32(result, 3, beta);
2748
0
    ggml_set_op_params_f32(result, 4, eps);
2749
2750
0
    result->op     = GGML_OP_UNARY;
2751
0
    result->src[0] = a;
2752
2753
0
    return result;
2754
0
}
2755
2756
// ggml_silu_back
2757
2758
struct ggml_tensor * ggml_silu_back(
2759
        struct ggml_context * ctx,
2760
        struct ggml_tensor  * a,
2761
0
        struct ggml_tensor  * b) {
2762
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2763
2764
0
    result->op     = GGML_OP_SILU_BACK;
2765
0
    result->src[0] = a;
2766
0
    result->src[1] = b;
2767
2768
0
    return result;
2769
0
}
2770
2771
// ggml hardswish
2772
2773
struct ggml_tensor * ggml_hardswish(
2774
        struct ggml_context * ctx,
2775
0
        struct ggml_tensor  * a) {
2776
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2777
0
}
2778
2779
// ggml hardsigmoid
2780
2781
struct ggml_tensor * ggml_hardsigmoid(
2782
        struct ggml_context * ctx,
2783
0
        struct ggml_tensor  * a) {
2784
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2785
0
}
2786
2787
// ggml exp
2788
2789
struct ggml_tensor * ggml_exp(
2790
        struct ggml_context * ctx,
2791
0
        struct ggml_tensor  * a) {
2792
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2793
0
}
2794
2795
struct ggml_tensor * ggml_exp_inplace(
2796
        struct ggml_context * ctx,
2797
0
        struct ggml_tensor  * a) {
2798
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2799
0
}
2800
2801
// ggml_glu
2802
2803
static struct ggml_tensor * ggml_glu_impl(
2804
        struct ggml_context * ctx,
2805
        struct ggml_tensor  * a,
2806
        struct ggml_tensor  * b,
2807
        enum ggml_glu_op      op,
2808
0
        bool                  swapped) {
2809
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2810
2811
0
    if (b) {
2812
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2813
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2814
0
        GGML_ASSERT(a->type == b->type);
2815
0
    }
2816
2817
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2818
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2819
2820
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2821
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2822
2823
0
    result->op     = GGML_OP_GLU;
2824
0
    result->src[0] = a;
2825
0
    result->src[1] = b;
2826
2827
0
    return result;
2828
0
}
2829
2830
// ggml_floor
2831
2832
struct ggml_tensor * ggml_floor(
2833
        struct ggml_context * ctx,
2834
0
        struct ggml_tensor  * a) {
2835
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2836
0
}
2837
2838
struct ggml_tensor * ggml_floor_inplace(
2839
        struct ggml_context * ctx,
2840
0
        struct ggml_tensor  * a) {
2841
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2842
0
}
2843
2844
// ggml_ceil
2845
2846
struct ggml_tensor * ggml_ceil(
2847
        struct ggml_context * ctx,
2848
0
        struct ggml_tensor  * a) {
2849
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2850
0
}
2851
2852
struct ggml_tensor * ggml_ceil_inplace(
2853
        struct ggml_context * ctx,
2854
0
        struct ggml_tensor  * a) {
2855
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2856
0
}
2857
2858
//ggml_round
2859
2860
struct ggml_tensor * ggml_round(
2861
        struct ggml_context * ctx,
2862
0
        struct ggml_tensor  * a) {
2863
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2864
0
}
2865
2866
struct ggml_tensor * ggml_round_inplace(
2867
        struct ggml_context * ctx,
2868
0
        struct ggml_tensor  * a) {
2869
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2870
0
}
2871
2872
//ggml_trunc
2873
2874
struct ggml_tensor * ggml_trunc(
2875
        struct ggml_context * ctx,
2876
0
        struct ggml_tensor  * a) {
2877
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2878
0
}
2879
2880
struct ggml_tensor * ggml_trunc_inplace(
2881
        struct ggml_context * ctx,
2882
0
        struct ggml_tensor  * a) {
2883
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2884
0
}
2885
2886
struct ggml_tensor * ggml_glu(
2887
        struct ggml_context * ctx,
2888
        struct ggml_tensor  * a,
2889
        enum ggml_glu_op      op,
2890
0
        bool                  swapped) {
2891
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2892
0
}
2893
2894
struct ggml_tensor * ggml_glu_split(
2895
        struct ggml_context * ctx,
2896
        struct ggml_tensor  * a,
2897
        struct ggml_tensor  * b,
2898
0
        enum ggml_glu_op      op) {
2899
0
    return ggml_glu_impl(ctx, a, b, op, false);
2900
0
}
2901
2902
// ggml_reglu
2903
2904
struct ggml_tensor * ggml_reglu(
2905
        struct ggml_context * ctx,
2906
0
        struct ggml_tensor  * a) {
2907
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2908
0
}
2909
2910
struct ggml_tensor * ggml_reglu_swapped(
2911
        struct ggml_context * ctx,
2912
0
        struct ggml_tensor  * a) {
2913
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2914
0
}
2915
2916
struct ggml_tensor * ggml_reglu_split(
2917
        struct ggml_context * ctx,
2918
        struct ggml_tensor  * a,
2919
0
        struct ggml_tensor  * b) {
2920
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2921
0
}
2922
2923
// ggml_geglu
2924
2925
struct ggml_tensor * ggml_geglu(
2926
        struct ggml_context * ctx,
2927
0
        struct ggml_tensor  * a) {
2928
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2929
0
}
2930
2931
struct ggml_tensor * ggml_geglu_swapped(
2932
        struct ggml_context * ctx,
2933
0
        struct ggml_tensor  * a) {
2934
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2935
0
}
2936
2937
struct ggml_tensor * ggml_geglu_split(
2938
        struct ggml_context * ctx,
2939
        struct ggml_tensor  * a,
2940
0
        struct ggml_tensor  * b) {
2941
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2942
0
}
2943
2944
// ggml_swiglu
2945
2946
struct ggml_tensor * ggml_swiglu(
2947
        struct ggml_context * ctx,
2948
0
        struct ggml_tensor  * a) {
2949
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2950
0
}
2951
2952
struct ggml_tensor * ggml_swiglu_swapped(
2953
        struct ggml_context * ctx,
2954
0
        struct ggml_tensor  * a) {
2955
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2956
0
}
2957
2958
struct ggml_tensor * ggml_swiglu_split(
2959
        struct ggml_context * ctx,
2960
        struct ggml_tensor  * a,
2961
0
        struct ggml_tensor  * b) {
2962
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
2963
0
}
2964
2965
// ggml_geglu_erf
2966
2967
struct ggml_tensor * ggml_geglu_erf(
2968
        struct ggml_context * ctx,
2969
0
        struct ggml_tensor  * a) {
2970
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
2971
0
}
2972
2973
struct ggml_tensor * ggml_geglu_erf_swapped(
2974
        struct ggml_context * ctx,
2975
0
        struct ggml_tensor  * a) {
2976
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
2977
0
}
2978
2979
struct ggml_tensor * ggml_geglu_erf_split(
2980
        struct ggml_context * ctx,
2981
        struct ggml_tensor  * a,
2982
0
        struct ggml_tensor  * b) {
2983
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
2984
0
}
2985
2986
// ggml_geglu_quick
2987
2988
struct ggml_tensor * ggml_geglu_quick(
2989
        struct ggml_context * ctx,
2990
0
        struct ggml_tensor  * a) {
2991
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
2992
0
}
2993
2994
struct ggml_tensor * ggml_geglu_quick_swapped(
2995
        struct ggml_context * ctx,
2996
0
        struct ggml_tensor  * a) {
2997
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
2998
0
}
2999
3000
struct ggml_tensor * ggml_geglu_quick_split(
3001
        struct ggml_context * ctx,
3002
        struct ggml_tensor  * a,
3003
0
        struct ggml_tensor  * b) {
3004
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3005
0
}
3006
3007
struct ggml_tensor * ggml_swiglu_oai(
3008
        struct ggml_context * ctx,
3009
        struct ggml_tensor  * a,
3010
        struct ggml_tensor  * b,
3011
        float                 alpha,
3012
0
        float                 limit) {
3013
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3014
0
    ggml_set_op_params_f32(result, 2, alpha);
3015
0
    ggml_set_op_params_f32(result, 3, limit);
3016
3017
0
    return result;
3018
0
}
3019
3020
// ggml_norm
3021
3022
static struct ggml_tensor * ggml_norm_impl(
3023
        struct ggml_context * ctx,
3024
        struct ggml_tensor  * a,
3025
        float                 eps,
3026
0
        bool                  inplace) {
3027
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3028
3029
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3030
3031
0
    result->op     = GGML_OP_NORM;
3032
0
    result->src[0] = a;
3033
3034
0
    return result;
3035
0
}
3036
3037
struct ggml_tensor * ggml_norm(
3038
        struct ggml_context * ctx,
3039
        struct ggml_tensor  * a,
3040
0
        float                 eps) {
3041
0
    return ggml_norm_impl(ctx, a, eps, false);
3042
0
}
3043
3044
struct ggml_tensor * ggml_norm_inplace(
3045
        struct ggml_context * ctx,
3046
        struct ggml_tensor  * a,
3047
0
        float                 eps) {
3048
0
    return ggml_norm_impl(ctx, a, eps, true);
3049
0
}
3050
3051
// ggml_rms_norm
3052
3053
static struct ggml_tensor * ggml_rms_norm_impl(
3054
        struct ggml_context * ctx,
3055
        struct ggml_tensor  * a,
3056
        float                 eps,
3057
0
        bool                  inplace) {
3058
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3059
3060
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3061
3062
0
    result->op     = GGML_OP_RMS_NORM;
3063
0
    result->src[0] = a;
3064
3065
0
    return result;
3066
0
}
3067
3068
struct ggml_tensor * ggml_rms_norm(
3069
        struct ggml_context * ctx,
3070
        struct ggml_tensor  * a,
3071
0
        float                 eps) {
3072
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3073
0
}
3074
3075
struct ggml_tensor * ggml_rms_norm_inplace(
3076
        struct ggml_context * ctx,
3077
        struct ggml_tensor  * a,
3078
0
        float                 eps) {
3079
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3080
0
}
3081
3082
// ggml_rms_norm_back
3083
3084
struct ggml_tensor * ggml_rms_norm_back(
3085
        struct ggml_context * ctx,
3086
        struct ggml_tensor  * a,
3087
        struct ggml_tensor  * b,
3088
0
        float                 eps) {
3089
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3090
3091
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3092
3093
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3094
0
    result->src[0] = a;
3095
0
    result->src[1] = b;
3096
3097
0
    return result;
3098
0
}
3099
3100
// ggml_group_norm
3101
3102
static struct ggml_tensor * ggml_group_norm_impl(
3103
        struct ggml_context * ctx,
3104
        struct ggml_tensor  * a,
3105
        int                   n_groups,
3106
        float                 eps,
3107
0
        bool                  inplace) {
3108
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3109
3110
0
    ggml_set_op_params_i32(result, 0, n_groups);
3111
0
    ggml_set_op_params_f32(result, 1, eps);
3112
3113
0
    result->op     = GGML_OP_GROUP_NORM;
3114
0
    result->src[0] = a;
3115
3116
0
    return result;
3117
0
}
3118
3119
struct ggml_tensor * ggml_group_norm(
3120
        struct ggml_context * ctx,
3121
        struct ggml_tensor  * a,
3122
        int                   n_groups,
3123
0
        float                 eps) {
3124
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3125
0
}
3126
3127
struct ggml_tensor * ggml_group_norm_inplace(
3128
        struct ggml_context * ctx,
3129
        struct ggml_tensor  * a,
3130
        int                   n_groups,
3131
0
        float                 eps) {
3132
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3133
0
}
3134
3135
// ggml_l2_norm
3136
3137
static struct ggml_tensor * ggml_l2_norm_impl(
3138
        struct ggml_context * ctx,
3139
        struct ggml_tensor  * a,
3140
        float                 eps,
3141
0
        bool                  inplace) {
3142
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3143
3144
0
    ggml_set_op_params_f32(result, 0, eps);
3145
3146
0
    result->op     = GGML_OP_L2_NORM;
3147
0
    result->src[0] = a;
3148
3149
0
    return result;
3150
0
}
3151
3152
struct ggml_tensor * ggml_l2_norm(
3153
        struct ggml_context * ctx,
3154
        struct ggml_tensor  * a,
3155
0
        float                 eps) {
3156
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3157
0
}
3158
3159
struct ggml_tensor * ggml_l2_norm_inplace(
3160
        struct ggml_context * ctx,
3161
        struct ggml_tensor  * a,
3162
0
        float                 eps) {
3163
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3164
0
}
3165
3166
// ggml_mul_mat
3167
3168
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3169
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3170
3171
0
    return (t0->ne[0]           == t1->ne[0])  &&
3172
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3173
0
           (t1->ne[3]%t0->ne[3] == 0);
3174
0
}
3175
3176
struct ggml_tensor * ggml_mul_mat(
3177
        struct ggml_context * ctx,
3178
        struct ggml_tensor  * a,
3179
0
        struct ggml_tensor  * b) {
3180
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3181
0
    GGML_ASSERT(!ggml_is_transposed(a));
3182
3183
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3184
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3185
3186
0
    result->op     = GGML_OP_MUL_MAT;
3187
0
    result->src[0] = a;
3188
0
    result->src[1] = b;
3189
3190
0
    return result;
3191
0
}
3192
3193
void ggml_mul_mat_set_prec(
3194
        struct ggml_tensor * a,
3195
0
        enum ggml_prec       prec) {
3196
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3197
3198
0
    const int32_t prec_i32 = (int32_t) prec;
3199
3200
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3201
0
}
3202
3203
// ggml_mul_mat_id
3204
3205
/*
3206
    c = ggml_mul_mat_id(ctx, as, b, ids);
3207
3208
    as  -> [cols, rows, n_expert]
3209
    b   -> [cols, n_expert_used, n_tokens]
3210
    ids -> [n_expert_used, n_tokens] (i32)
3211
    c   -> [rows, n_expert_used, n_tokens]
3212
3213
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3214
3215
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3216
*/
3217
struct ggml_tensor * ggml_mul_mat_id(
3218
        struct ggml_context * ctx,
3219
        struct ggml_tensor  * as,
3220
        struct ggml_tensor  * b,
3221
0
        struct ggml_tensor  * ids) {
3222
0
    GGML_ASSERT(!ggml_is_transposed(as));
3223
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3224
3225
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3226
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3227
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3228
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3229
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3230
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3231
3232
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3233
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3234
3235
0
    result->op     = GGML_OP_MUL_MAT_ID;
3236
0
    result->src[0] = as;
3237
0
    result->src[1] = b;
3238
0
    result->src[2] = ids;
3239
3240
0
    return result;
3241
0
}
3242
3243
// ggml_out_prod
3244
3245
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3246
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3247
3248
0
    return (t0->ne[1] == t1->ne[1])   &&
3249
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3250
0
           (t1->ne[3]%t0->ne[3] == 0);
3251
0
}
3252
3253
struct ggml_tensor * ggml_out_prod(
3254
        struct ggml_context * ctx,
3255
        struct ggml_tensor  * a,
3256
0
        struct ggml_tensor  * b) {
3257
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3258
0
    GGML_ASSERT(!ggml_is_transposed(a));
3259
3260
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3261
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3262
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3263
3264
0
    result->op     = GGML_OP_OUT_PROD;
3265
0
    result->src[0] = a;
3266
0
    result->src[1] = b;
3267
3268
0
    return result;
3269
0
}
3270
3271
// ggml_scale
3272
3273
static struct ggml_tensor * ggml_scale_impl(
3274
        struct ggml_context * ctx,
3275
        struct ggml_tensor  * a,
3276
        float                 s,
3277
        float                 b,
3278
0
        bool                  inplace) {
3279
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3280
3281
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3282
3283
0
    float params[2] = { s, b };
3284
0
    ggml_set_op_params(result, &params, sizeof(params));
3285
3286
0
    result->op     = GGML_OP_SCALE;
3287
0
    result->src[0] = a;
3288
3289
0
    return result;
3290
0
}
3291
3292
struct ggml_tensor * ggml_scale(
3293
        struct ggml_context * ctx,
3294
        struct ggml_tensor  * a,
3295
0
        float                 s) {
3296
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3297
0
}
3298
3299
struct ggml_tensor * ggml_scale_inplace(
3300
        struct ggml_context * ctx,
3301
        struct ggml_tensor  * a,
3302
0
        float                 s) {
3303
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3304
0
}
3305
3306
struct ggml_tensor * ggml_scale_bias(
3307
        struct ggml_context * ctx,
3308
        struct ggml_tensor  * a,
3309
        float                 s,
3310
0
        float                 b) {
3311
0
    return ggml_scale_impl(ctx, a, s, b, false);
3312
0
}
3313
3314
struct ggml_tensor * ggml_scale_bias_inplace(
3315
        struct ggml_context * ctx,
3316
        struct ggml_tensor  * a,
3317
        float                 s,
3318
0
        float                 b) {
3319
0
    return ggml_scale_impl(ctx, a, s, b, true);
3320
0
}
3321
3322
// ggml_set
3323
3324
static struct ggml_tensor * ggml_set_impl(
3325
        struct ggml_context * ctx,
3326
        struct ggml_tensor  * a,
3327
        struct ggml_tensor  * b,
3328
        size_t                nb1,
3329
        size_t                nb2,
3330
        size_t                nb3,
3331
        size_t                offset,
3332
0
        bool                  inplace) {
3333
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3334
3335
    // make a view of the destination
3336
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3337
3338
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3339
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3340
0
    ggml_set_op_params(result, params, sizeof(params));
3341
3342
0
    result->op     = GGML_OP_SET;
3343
0
    result->src[0] = a;
3344
0
    result->src[1] = b;
3345
3346
0
    return result;
3347
0
}
3348
3349
struct ggml_tensor * ggml_set(
3350
        struct ggml_context * ctx,
3351
        struct ggml_tensor  * a,
3352
        struct ggml_tensor  * b,
3353
        size_t                nb1,
3354
        size_t                nb2,
3355
        size_t                nb3,
3356
0
        size_t                offset) {
3357
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3358
0
}
3359
3360
struct ggml_tensor * ggml_set_inplace(
3361
        struct ggml_context * ctx,
3362
        struct ggml_tensor  * a,
3363
        struct ggml_tensor  * b,
3364
        size_t                nb1,
3365
        size_t                nb2,
3366
        size_t                nb3,
3367
0
        size_t                offset) {
3368
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3369
0
}
3370
3371
struct ggml_tensor * ggml_set_1d(
3372
        struct ggml_context * ctx,
3373
        struct ggml_tensor  * a,
3374
        struct ggml_tensor  * b,
3375
0
        size_t                offset) {
3376
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3377
0
}
3378
3379
struct ggml_tensor * ggml_set_1d_inplace(
3380
        struct ggml_context * ctx,
3381
        struct ggml_tensor  * a,
3382
        struct ggml_tensor  * b,
3383
0
        size_t                offset) {
3384
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3385
0
}
3386
3387
struct ggml_tensor * ggml_set_2d(
3388
        struct ggml_context * ctx,
3389
        struct ggml_tensor  * a,
3390
        struct ggml_tensor  * b,
3391
        size_t                nb1,
3392
0
        size_t                offset) {
3393
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3394
0
}
3395
3396
struct ggml_tensor * ggml_set_2d_inplace(
3397
        struct ggml_context * ctx,
3398
        struct ggml_tensor  * a,
3399
        struct ggml_tensor  * b,
3400
        size_t                nb1,
3401
0
        size_t                offset) {
3402
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3403
0
}
3404
3405
// ggml_cpy
3406
3407
static struct ggml_tensor * ggml_cpy_impl(
3408
        struct ggml_context * ctx,
3409
        struct ggml_tensor  * a,
3410
0
        struct ggml_tensor  * b) {
3411
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3412
3413
    // make a view of the destination
3414
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3415
0
    if (strlen(b->name) > 0) {
3416
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3417
0
    } else {
3418
0
        ggml_format_name(result, "%s (copy)", a->name);
3419
0
    }
3420
3421
0
    result->op     = GGML_OP_CPY;
3422
0
    result->src[0] = a;
3423
0
    result->src[1] = b;
3424
3425
0
    return result;
3426
0
}
3427
3428
struct ggml_tensor * ggml_cpy(
3429
        struct ggml_context * ctx,
3430
        struct ggml_tensor * a,
3431
0
        struct ggml_tensor * b) {
3432
0
    return ggml_cpy_impl(ctx, a, b);
3433
0
}
3434
3435
struct ggml_tensor * ggml_cast(
3436
        struct ggml_context * ctx,
3437
        struct ggml_tensor  * a,
3438
0
        enum   ggml_type      type) {
3439
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3440
0
    ggml_format_name(result, "%s (copy)", a->name);
3441
3442
0
    result->op     = GGML_OP_CPY;
3443
0
    result->src[0] = a;
3444
0
    result->src[1] = result;
3445
3446
0
    return result;
3447
0
}
3448
3449
// ggml_cont
3450
3451
static struct ggml_tensor * ggml_cont_impl(
3452
        struct ggml_context * ctx,
3453
0
        struct ggml_tensor  * a) {
3454
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3455
0
    ggml_format_name(result, "%s (cont)", a->name);
3456
3457
0
    result->op     = GGML_OP_CONT;
3458
0
    result->src[0] = a;
3459
3460
0
    return result;
3461
0
}
3462
3463
struct ggml_tensor * ggml_cont(
3464
        struct ggml_context * ctx,
3465
0
        struct ggml_tensor * a) {
3466
0
    return ggml_cont_impl(ctx, a);
3467
0
}
3468
3469
// make contiguous, with new shape
3470
GGML_API struct ggml_tensor * ggml_cont_1d(
3471
        struct ggml_context * ctx,
3472
        struct ggml_tensor  * a,
3473
0
        int64_t               ne0) {
3474
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3475
0
}
3476
3477
GGML_API struct ggml_tensor * ggml_cont_2d(
3478
        struct ggml_context * ctx,
3479
        struct ggml_tensor  * a,
3480
        int64_t               ne0,
3481
0
        int64_t               ne1) {
3482
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3483
0
}
3484
3485
GGML_API struct ggml_tensor * ggml_cont_3d(
3486
        struct ggml_context * ctx,
3487
        struct ggml_tensor  * a,
3488
        int64_t               ne0,
3489
        int64_t               ne1,
3490
0
        int64_t               ne2) {
3491
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3492
0
}
3493
3494
struct ggml_tensor * ggml_cont_4d(
3495
        struct ggml_context * ctx,
3496
        struct ggml_tensor  * a,
3497
        int64_t               ne0,
3498
        int64_t               ne1,
3499
        int64_t               ne2,
3500
0
        int64_t               ne3) {
3501
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3502
3503
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3504
0
    ggml_format_name(result, "%s (cont)", a->name);
3505
3506
0
    result->op     = GGML_OP_CONT;
3507
0
    result->src[0] = a;
3508
3509
0
    return result;
3510
0
}
3511
3512
// ggml_reshape
3513
3514
struct ggml_tensor * ggml_reshape(
3515
        struct ggml_context * ctx,
3516
        struct ggml_tensor * a,
3517
0
        struct ggml_tensor * b) {
3518
0
    GGML_ASSERT(ggml_is_contiguous(a));
3519
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3520
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3521
3522
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3523
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3524
3525
0
    result->op     = GGML_OP_RESHAPE;
3526
0
    result->src[0] = a;
3527
3528
0
    return result;
3529
0
}
3530
3531
struct ggml_tensor * ggml_reshape_1d(
3532
        struct ggml_context * ctx,
3533
        struct ggml_tensor  * a,
3534
0
        int64_t               ne0) {
3535
0
    GGML_ASSERT(ggml_is_contiguous(a));
3536
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3537
3538
0
    const int64_t ne[1] = { ne0 };
3539
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3540
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3541
3542
0
    result->op     = GGML_OP_RESHAPE;
3543
0
    result->src[0] = a;
3544
3545
0
    return result;
3546
0
}
3547
3548
struct ggml_tensor * ggml_reshape_2d(
3549
        struct ggml_context * ctx,
3550
        struct ggml_tensor  * a,
3551
        int64_t               ne0,
3552
0
        int64_t               ne1) {
3553
0
    GGML_ASSERT(ggml_is_contiguous(a));
3554
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3555
3556
0
    const int64_t ne[2] = { ne0, ne1 };
3557
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3558
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3559
3560
0
    result->op     = GGML_OP_RESHAPE;
3561
0
    result->src[0] = a;
3562
3563
0
    return result;
3564
0
}
3565
3566
struct ggml_tensor * ggml_reshape_3d(
3567
        struct ggml_context * ctx,
3568
        struct ggml_tensor  * a,
3569
        int64_t               ne0,
3570
        int64_t               ne1,
3571
0
        int64_t               ne2) {
3572
0
    GGML_ASSERT(ggml_is_contiguous(a));
3573
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3574
3575
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3576
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3577
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3578
3579
0
    result->op     = GGML_OP_RESHAPE;
3580
0
    result->src[0] = a;
3581
3582
0
    return result;
3583
0
}
3584
3585
struct ggml_tensor * ggml_reshape_4d(
3586
        struct ggml_context * ctx,
3587
        struct ggml_tensor  * a,
3588
        int64_t               ne0,
3589
        int64_t               ne1,
3590
        int64_t               ne2,
3591
0
        int64_t               ne3) {
3592
0
    GGML_ASSERT(ggml_is_contiguous(a));
3593
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3594
3595
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3596
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3597
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3598
3599
0
    result->op     = GGML_OP_RESHAPE;
3600
0
    result->src[0] = a;
3601
3602
0
    return result;
3603
0
}
3604
3605
static struct ggml_tensor * ggml_view_impl(
3606
        struct ggml_context * ctx,
3607
        struct ggml_tensor  * a,
3608
        int                   n_dims,
3609
        const int64_t       * ne,
3610
0
        size_t                offset) {
3611
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3612
0
    ggml_format_name(result, "%s (view)", a->name);
3613
3614
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3615
3616
0
    result->op     = GGML_OP_VIEW;
3617
0
    result->src[0] = a;
3618
3619
0
    return result;
3620
0
}
3621
3622
// ggml_view_1d
3623
3624
struct ggml_tensor * ggml_view_1d(
3625
        struct ggml_context * ctx,
3626
        struct ggml_tensor  * a,
3627
        int64_t               ne0,
3628
0
        size_t                offset) {
3629
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3630
3631
0
    return result;
3632
0
}
3633
3634
// ggml_view_2d
3635
3636
struct ggml_tensor * ggml_view_2d(
3637
        struct ggml_context * ctx,
3638
        struct ggml_tensor  * a,
3639
        int64_t               ne0,
3640
        int64_t               ne1,
3641
        size_t                nb1,
3642
0
        size_t                offset) {
3643
0
    const int64_t ne[2] = { ne0, ne1 };
3644
3645
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3646
3647
0
    result->nb[1] = nb1;
3648
0
    result->nb[2] = result->nb[1]*ne1;
3649
0
    result->nb[3] = result->nb[2];
3650
3651
0
    return result;
3652
0
}
3653
3654
// ggml_view_3d
3655
3656
struct ggml_tensor * ggml_view_3d(
3657
        struct ggml_context * ctx,
3658
        struct ggml_tensor  * a,
3659
        int64_t               ne0,
3660
        int64_t               ne1,
3661
        int64_t               ne2,
3662
        size_t                nb1,
3663
        size_t                nb2,
3664
0
        size_t                offset) {
3665
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3666
3667
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3668
3669
0
    result->nb[1] = nb1;
3670
0
    result->nb[2] = nb2;
3671
0
    result->nb[3] = result->nb[2]*ne2;
3672
3673
0
    return result;
3674
0
}
3675
3676
// ggml_view_4d
3677
3678
struct ggml_tensor * ggml_view_4d(
3679
        struct ggml_context * ctx,
3680
        struct ggml_tensor  * a,
3681
        int64_t               ne0,
3682
        int64_t               ne1,
3683
        int64_t               ne2,
3684
        int64_t               ne3,
3685
        size_t                nb1,
3686
        size_t                nb2,
3687
        size_t                nb3,
3688
0
        size_t                offset) {
3689
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3690
3691
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3692
3693
0
    result->nb[1] = nb1;
3694
0
    result->nb[2] = nb2;
3695
0
    result->nb[3] = nb3;
3696
3697
0
    return result;
3698
0
}
3699
3700
// ggml_permute
3701
3702
struct ggml_tensor * ggml_permute(
3703
        struct ggml_context * ctx,
3704
        struct ggml_tensor  * a,
3705
        int                   axis0,
3706
        int                   axis1,
3707
        int                   axis2,
3708
0
        int                   axis3) {
3709
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3710
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3711
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3712
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3713
3714
0
    GGML_ASSERT(axis0 != axis1);
3715
0
    GGML_ASSERT(axis0 != axis2);
3716
0
    GGML_ASSERT(axis0 != axis3);
3717
0
    GGML_ASSERT(axis1 != axis2);
3718
0
    GGML_ASSERT(axis1 != axis3);
3719
0
    GGML_ASSERT(axis2 != axis3);
3720
3721
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3722
0
    ggml_format_name(result, "%s (permuted)", a->name);
3723
3724
0
    int ne[GGML_MAX_DIMS];
3725
0
    int nb[GGML_MAX_DIMS];
3726
3727
0
    ne[axis0] = a->ne[0];
3728
0
    ne[axis1] = a->ne[1];
3729
0
    ne[axis2] = a->ne[2];
3730
0
    ne[axis3] = a->ne[3];
3731
3732
0
    nb[axis0] = a->nb[0];
3733
0
    nb[axis1] = a->nb[1];
3734
0
    nb[axis2] = a->nb[2];
3735
0
    nb[axis3] = a->nb[3];
3736
3737
0
    result->ne[0] = ne[0];
3738
0
    result->ne[1] = ne[1];
3739
0
    result->ne[2] = ne[2];
3740
0
    result->ne[3] = ne[3];
3741
3742
0
    result->nb[0] = nb[0];
3743
0
    result->nb[1] = nb[1];
3744
0
    result->nb[2] = nb[2];
3745
0
    result->nb[3] = nb[3];
3746
3747
0
    result->op     = GGML_OP_PERMUTE;
3748
0
    result->src[0] = a;
3749
3750
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3751
0
    ggml_set_op_params(result, params, sizeof(params));
3752
3753
0
    return result;
3754
0
}
3755
3756
// ggml_transpose
3757
3758
struct ggml_tensor * ggml_transpose(
3759
        struct ggml_context * ctx,
3760
0
        struct ggml_tensor  * a) {
3761
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3762
0
    ggml_format_name(result, "%s (transposed)", a->name);
3763
3764
0
    result->ne[0] = a->ne[1];
3765
0
    result->ne[1] = a->ne[0];
3766
3767
0
    result->nb[0] = a->nb[1];
3768
0
    result->nb[1] = a->nb[0];
3769
3770
0
    result->op     = GGML_OP_TRANSPOSE;
3771
0
    result->src[0] = a;
3772
3773
0
    return result;
3774
0
}
3775
3776
// ggml_get_rows
3777
3778
struct ggml_tensor * ggml_get_rows(
3779
        struct ggml_context * ctx,
3780
        struct ggml_tensor  * a,
3781
0
        struct ggml_tensor  * b) {
3782
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3783
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3784
0
    GGML_ASSERT(b->ne[3] == 1);
3785
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3786
3787
    // TODO: implement non F32 return
3788
0
    enum ggml_type type = GGML_TYPE_F32;
3789
0
    if (a->type == GGML_TYPE_I32) {
3790
0
        type = a->type;
3791
0
    }
3792
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3793
3794
0
    result->op     = GGML_OP_GET_ROWS;
3795
0
    result->src[0] = a;
3796
0
    result->src[1] = b;
3797
3798
0
    return result;
3799
0
}
3800
3801
// ggml_get_rows_back
3802
3803
struct ggml_tensor * ggml_get_rows_back(
3804
        struct ggml_context * ctx,
3805
        struct ggml_tensor  * a,
3806
        struct ggml_tensor  * b,
3807
0
        struct ggml_tensor  * c) {
3808
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3809
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3810
3811
    // TODO: implement non F32 return
3812
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3813
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3814
3815
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3816
0
    result->src[0] = a;
3817
0
    result->src[1] = b;
3818
3819
0
    return result;
3820
0
}
3821
3822
// ggml_set_rows
3823
3824
struct ggml_tensor * ggml_set_rows(
3825
        struct ggml_context * ctx,
3826
        struct ggml_tensor  * a,
3827
        struct ggml_tensor  * b,
3828
0
        struct ggml_tensor  * c) {
3829
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3830
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3831
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3832
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3833
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3834
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3835
0
    GGML_ASSERT(c->ne[3] == 1);
3836
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3837
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3838
3839
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3840
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3841
3842
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3843
3844
0
    result->op     = GGML_OP_SET_ROWS;
3845
0
    result->src[0] = b;
3846
0
    result->src[1] = c;
3847
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3848
3849
0
    return result;
3850
0
}
3851
3852
// ggml_diag
3853
3854
struct ggml_tensor * ggml_diag(
3855
        struct ggml_context * ctx,
3856
0
        struct ggml_tensor  * a) {
3857
0
    GGML_ASSERT(a->ne[1] == 1);
3858
3859
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3860
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3861
3862
0
    result->op     = GGML_OP_DIAG;
3863
0
    result->src[0] = a;
3864
3865
0
    return result;
3866
0
}
3867
3868
// ggml_diag_mask_inf
3869
3870
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3871
        struct ggml_context * ctx,
3872
        struct ggml_tensor  * a,
3873
        int                   n_past,
3874
0
        bool                  inplace) {
3875
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3876
3877
0
    int32_t params[] = { n_past };
3878
0
    ggml_set_op_params(result, params, sizeof(params));
3879
3880
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3881
0
    result->src[0] = a;
3882
3883
0
    return result;
3884
0
}
3885
3886
struct ggml_tensor * ggml_diag_mask_inf(
3887
        struct ggml_context * ctx,
3888
        struct ggml_tensor  * a,
3889
0
        int                   n_past) {
3890
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3891
0
}
3892
3893
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3894
        struct ggml_context * ctx,
3895
        struct ggml_tensor  * a,
3896
0
        int                   n_past) {
3897
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3898
0
}
3899
3900
// ggml_diag_mask_zero
3901
3902
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3903
        struct ggml_context * ctx,
3904
        struct ggml_tensor  * a,
3905
        int                   n_past,
3906
0
        bool                  inplace) {
3907
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3908
3909
0
    int32_t params[] = { n_past };
3910
0
    ggml_set_op_params(result, params, sizeof(params));
3911
3912
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3913
0
    result->src[0] = a;
3914
3915
0
    return result;
3916
0
}
3917
3918
struct ggml_tensor * ggml_diag_mask_zero(
3919
        struct ggml_context * ctx,
3920
        struct ggml_tensor  * a,
3921
0
        int                   n_past) {
3922
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3923
0
}
3924
3925
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3926
        struct ggml_context * ctx,
3927
        struct ggml_tensor  * a,
3928
0
        int                   n_past) {
3929
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3930
0
}
3931
3932
// ggml_soft_max
3933
3934
static struct ggml_tensor * ggml_soft_max_impl(
3935
        struct ggml_context * ctx,
3936
        struct ggml_tensor  * a,
3937
        struct ggml_tensor  * mask,
3938
        float                 scale,
3939
        float                 max_bias,
3940
0
        bool                  inplace) {
3941
0
    GGML_ASSERT(ggml_is_contiguous(a));
3942
3943
0
    if (mask) {
3944
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3945
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3946
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3947
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3948
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3949
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3950
0
    }
3951
3952
0
    if (max_bias > 0.0f) {
3953
0
        GGML_ASSERT(mask);
3954
0
    }
3955
3956
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3957
3958
0
    float params[] = { scale, max_bias };
3959
0
    ggml_set_op_params(result, params, sizeof(params));
3960
3961
0
    result->op     = GGML_OP_SOFT_MAX;
3962
0
    result->src[0] = a;
3963
0
    result->src[1] = mask;
3964
3965
0
    return result;
3966
0
}
3967
3968
struct ggml_tensor * ggml_soft_max(
3969
        struct ggml_context * ctx,
3970
0
        struct ggml_tensor  * a) {
3971
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
3972
0
}
3973
3974
struct ggml_tensor * ggml_soft_max_inplace(
3975
        struct ggml_context * ctx,
3976
0
        struct ggml_tensor  * a) {
3977
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
3978
0
}
3979
3980
struct ggml_tensor * ggml_soft_max_ext(
3981
        struct ggml_context * ctx,
3982
        struct ggml_tensor  * a,
3983
        struct ggml_tensor  * mask,
3984
        float                 scale,
3985
0
        float                 max_bias) {
3986
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
3987
0
}
3988
3989
struct ggml_tensor * ggml_soft_max_ext_inplace(
3990
        struct ggml_context * ctx,
3991
        struct ggml_tensor  * a,
3992
        struct ggml_tensor  * mask,
3993
        float                 scale,
3994
0
        float                 max_bias) {
3995
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
3996
0
}
3997
3998
void ggml_soft_max_add_sinks(
3999
        struct ggml_tensor * a,
4000
0
        struct ggml_tensor * sinks) {
4001
0
    if (!sinks) {
4002
0
        a->src[2] = NULL;
4003
0
        return;
4004
0
    }
4005
4006
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4007
0
    GGML_ASSERT(a->src[2] == NULL);
4008
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4009
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4010
4011
0
    a->src[2] = sinks;
4012
0
}
4013
4014
// ggml_soft_max_ext_back
4015
4016
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4017
        struct ggml_context * ctx,
4018
        struct ggml_tensor  * a,
4019
        struct ggml_tensor  * b,
4020
        float                 scale,
4021
        float                 max_bias,
4022
0
        bool                  inplace) {
4023
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4024
4025
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4026
0
    result->src[0] = a;
4027
0
    result->src[1] = b;
4028
4029
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4030
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4031
4032
0
    return result;
4033
0
}
4034
4035
struct ggml_tensor * ggml_soft_max_ext_back(
4036
        struct ggml_context * ctx,
4037
        struct ggml_tensor  * a,
4038
        struct ggml_tensor  * b,
4039
        float                 scale,
4040
0
        float                 max_bias) {
4041
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4042
0
}
4043
4044
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4045
        struct ggml_context * ctx,
4046
        struct ggml_tensor  * a,
4047
        struct ggml_tensor  * b,
4048
        float                 scale,
4049
0
        float                 max_bias) {
4050
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4051
0
}
4052
4053
// ggml_rope
4054
4055
static struct ggml_tensor * ggml_rope_impl(
4056
        struct ggml_context * ctx,
4057
        struct ggml_tensor  * a,
4058
        struct ggml_tensor  * b,
4059
        struct ggml_tensor  * c,
4060
        int                   n_dims,
4061
        int                   sections[GGML_MROPE_SECTIONS],
4062
        int                   mode,
4063
        int                   n_ctx_orig,
4064
        float                 freq_base,
4065
        float                 freq_scale,
4066
        float                 ext_factor,
4067
        float                 attn_factor,
4068
        float                 beta_fast,
4069
        float                 beta_slow,
4070
0
        bool                  inplace) {
4071
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4072
4073
0
    GGML_ASSERT(ggml_is_vector(b));
4074
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4075
4076
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4077
0
    if (mrope_used) {
4078
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4079
0
    } else {
4080
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4081
0
    }
4082
4083
0
    if (c) {
4084
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4085
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4086
0
    }
4087
4088
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4089
4090
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4091
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4092
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4093
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4094
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4095
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4096
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4097
0
    if (mrope_used && sections) {
4098
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4099
0
    } else {
4100
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4101
0
    }
4102
0
    ggml_set_op_params(result, params, sizeof(params));
4103
4104
0
    result->op     = GGML_OP_ROPE;
4105
0
    result->src[0] = a;
4106
0
    result->src[1] = b;
4107
0
    result->src[2] = c;
4108
4109
0
    return result;
4110
0
}
4111
4112
struct ggml_tensor * ggml_rope(
4113
        struct ggml_context * ctx,
4114
        struct ggml_tensor  * a,
4115
        struct ggml_tensor  * b,
4116
        int                   n_dims,
4117
0
        int                   mode) {
4118
0
    return ggml_rope_impl(
4119
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4120
0
    );
4121
0
}
4122
4123
struct ggml_tensor * ggml_rope_multi(
4124
        struct ggml_context * ctx,
4125
        struct ggml_tensor  * a,
4126
        struct ggml_tensor  * b,
4127
        struct ggml_tensor  * c,
4128
        int                   n_dims,
4129
        int                   sections[GGML_MROPE_SECTIONS],
4130
        int                   mode,
4131
        int                   n_ctx_orig,
4132
        float                 freq_base,
4133
        float                 freq_scale,
4134
        float                 ext_factor,
4135
        float                 attn_factor,
4136
        float                 beta_fast,
4137
0
        float                 beta_slow) {
4138
0
    return ggml_rope_impl(
4139
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4140
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4141
0
    );
4142
0
}
4143
4144
struct ggml_tensor * ggml_rope_multi_inplace(
4145
        struct ggml_context * ctx,
4146
        struct ggml_tensor  * a,
4147
        struct ggml_tensor  * b,
4148
        struct ggml_tensor  * c,
4149
        int                   n_dims,
4150
        int                   sections[GGML_MROPE_SECTIONS],
4151
        int                   mode,
4152
        int                   n_ctx_orig,
4153
        float                 freq_base,
4154
        float                 freq_scale,
4155
        float                 ext_factor,
4156
        float                 attn_factor,
4157
        float                 beta_fast,
4158
0
        float                 beta_slow) {
4159
0
    return ggml_rope_impl(
4160
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4161
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4162
0
    );
4163
0
}
4164
4165
struct ggml_tensor * ggml_rope_inplace(
4166
        struct ggml_context * ctx,
4167
        struct ggml_tensor  * a,
4168
        struct ggml_tensor  * b,
4169
        int                   n_dims,
4170
0
        int                   mode) {
4171
0
    return ggml_rope_impl(
4172
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4173
0
    );
4174
0
}
4175
4176
struct ggml_tensor * ggml_rope_ext(
4177
        struct ggml_context * ctx,
4178
        struct ggml_tensor  * a,
4179
        struct ggml_tensor  * b,
4180
        struct ggml_tensor  * c,
4181
        int                   n_dims,
4182
        int                   mode,
4183
        int                   n_ctx_orig,
4184
        float                 freq_base,
4185
        float                 freq_scale,
4186
        float                 ext_factor,
4187
        float                 attn_factor,
4188
        float                 beta_fast,
4189
0
        float                 beta_slow) {
4190
0
    return ggml_rope_impl(
4191
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4192
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4193
0
    );
4194
0
}
4195
4196
struct ggml_tensor * ggml_rope_ext_inplace(
4197
        struct ggml_context * ctx,
4198
        struct ggml_tensor  * a,
4199
        struct ggml_tensor  * b,
4200
        struct ggml_tensor  * c,
4201
        int                   n_dims,
4202
        int                   mode,
4203
        int                   n_ctx_orig,
4204
        float                 freq_base,
4205
        float                 freq_scale,
4206
        float                 ext_factor,
4207
        float                 attn_factor,
4208
        float                 beta_fast,
4209
0
        float                 beta_slow) {
4210
0
    return ggml_rope_impl(
4211
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4212
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4213
0
    );
4214
0
}
4215
4216
struct ggml_tensor * ggml_rope_custom(
4217
        struct ggml_context * ctx,
4218
        struct ggml_tensor  * a,
4219
        struct ggml_tensor  * b,
4220
        int                   n_dims,
4221
        int                   mode,
4222
        int                   n_ctx_orig,
4223
        float                 freq_base,
4224
        float                 freq_scale,
4225
        float                 ext_factor,
4226
        float                 attn_factor,
4227
        float                 beta_fast,
4228
0
        float                 beta_slow) {
4229
0
    return ggml_rope_impl(
4230
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4231
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4232
0
    );
4233
0
}
4234
4235
struct ggml_tensor * ggml_rope_custom_inplace(
4236
        struct ggml_context * ctx,
4237
        struct ggml_tensor  * a,
4238
        struct ggml_tensor  * b,
4239
        int                   n_dims,
4240
        int                   mode,
4241
        int                   n_ctx_orig,
4242
        float                 freq_base,
4243
        float                 freq_scale,
4244
        float                 ext_factor,
4245
        float                 attn_factor,
4246
        float                 beta_fast,
4247
0
        float                 beta_slow) {
4248
0
    return ggml_rope_impl(
4249
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4250
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4251
0
    );
4252
0
}
4253
4254
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4255
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4256
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4257
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4258
0
}
4259
4260
void ggml_rope_yarn_corr_dims(
4261
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4262
0
) {
4263
    // start and end correction dims
4264
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4265
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4266
0
    dims[0] = MAX(0, start);
4267
0
    dims[1] = MIN(n_dims - 1, end);
4268
0
}
4269
4270
// ggml_rope_back
4271
4272
struct ggml_tensor * ggml_rope_ext_back(
4273
        struct ggml_context * ctx,
4274
        struct ggml_tensor  * a,
4275
        struct ggml_tensor  * b,
4276
        struct ggml_tensor  * c,
4277
        int                   n_dims,
4278
        int                   mode,
4279
        int                   n_ctx_orig,
4280
        float                 freq_base,
4281
        float                 freq_scale,
4282
        float                 ext_factor,
4283
        float                 attn_factor,
4284
        float                 beta_fast,
4285
0
        float                 beta_slow) {
4286
0
    struct ggml_tensor * result = ggml_rope_ext(
4287
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4288
0
    result->op = GGML_OP_ROPE_BACK;
4289
0
    return result;
4290
0
}
4291
4292
struct ggml_tensor * ggml_rope_multi_back(
4293
        struct ggml_context * ctx,
4294
        struct ggml_tensor  * a,
4295
        struct ggml_tensor  * b,
4296
        struct ggml_tensor  * c,
4297
        int                   n_dims,
4298
        int                   sections[4],
4299
        int                   mode,
4300
        int                   n_ctx_orig,
4301
        float                 freq_base,
4302
        float                 freq_scale,
4303
        float                 ext_factor,
4304
        float                 attn_factor,
4305
        float                 beta_fast,
4306
0
        float                 beta_slow) {
4307
0
    struct ggml_tensor * result = ggml_rope_multi(
4308
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4309
0
    result->op = GGML_OP_ROPE_BACK;
4310
0
    return result;
4311
0
}
4312
// ggml_clamp
4313
4314
struct ggml_tensor * ggml_clamp(
4315
        struct ggml_context * ctx,
4316
        struct ggml_tensor  * a,
4317
        float                 min,
4318
0
        float                 max) {
4319
    // TODO: when implement backward, fix this:
4320
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4321
4322
0
    float params[] = { min, max };
4323
0
    ggml_set_op_params(result, params, sizeof(params));
4324
4325
0
    result->op     = GGML_OP_CLAMP;
4326
0
    result->src[0] = a;
4327
4328
0
    return result;
4329
0
}
4330
4331
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4332
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4333
0
}
4334
4335
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4336
// a: [OC,IC, KH, KW]
4337
// b: [N, IC, IH, IW]
4338
// result: [N, OH, OW, IC*KH*KW]
4339
struct ggml_tensor * ggml_im2col(
4340
        struct ggml_context * ctx,
4341
        struct ggml_tensor  * a,
4342
        struct ggml_tensor  * b,
4343
        int                   s0,
4344
        int                   s1,
4345
        int                   p0,
4346
        int                   p1,
4347
        int                   d0,
4348
        int                   d1,
4349
        bool                  is_2D,
4350
0
        enum ggml_type        dst_type) {
4351
0
    if (is_2D) {
4352
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4353
0
    } else {
4354
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4355
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4356
0
        GGML_ASSERT(b->ne[3] == 1);
4357
0
    }
4358
4359
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4360
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4361
4362
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4363
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4364
4365
0
    const int64_t ne[4] = {
4366
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4367
0
        OW,
4368
0
        is_2D ? OH : b->ne[2],
4369
0
        is_2D ?      b->ne[3] : 1,
4370
0
    };
4371
4372
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4373
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4374
0
    ggml_set_op_params(result, params, sizeof(params));
4375
4376
0
    result->op     = GGML_OP_IM2COL;
4377
0
    result->src[0] = a;
4378
0
    result->src[1] = b;
4379
4380
0
    return result;
4381
0
}
4382
4383
struct ggml_tensor * ggml_im2col_back(
4384
        struct ggml_context * ctx,
4385
        struct ggml_tensor  * a,
4386
        struct ggml_tensor  * b,
4387
        int64_t             * ne,
4388
        int                   s0,
4389
        int                   s1,
4390
        int                   p0,
4391
        int                   p1,
4392
        int                   d0,
4393
        int                   d1,
4394
0
        bool                  is_2D) {
4395
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4396
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4397
0
    ggml_set_op_params(result, params, sizeof(params));
4398
4399
0
    result->op     = GGML_OP_IM2COL_BACK;
4400
0
    result->src[0] = a;
4401
0
    result->src[1] = b;
4402
4403
0
    return result;
4404
0
}
4405
4406
// ggml_conv_1d
4407
4408
struct ggml_tensor * ggml_conv_1d(
4409
        struct ggml_context * ctx,
4410
        struct ggml_tensor  * a,
4411
        struct ggml_tensor  * b,
4412
        int                   s0,
4413
        int                   p0,
4414
0
        int                   d0) {
4415
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4416
4417
0
    struct ggml_tensor * result =
4418
0
        ggml_mul_mat(ctx,
4419
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4420
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4421
4422
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4423
4424
0
    return result;
4425
0
}
4426
4427
// ggml_conv_1d_ph
4428
4429
struct ggml_tensor* ggml_conv_1d_ph(
4430
        struct ggml_context * ctx,
4431
        struct ggml_tensor  * a,
4432
        struct ggml_tensor  * b,
4433
        int                   s,
4434
0
        int                   d) {
4435
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4436
0
}
4437
4438
// ggml_conv_1d_dw
4439
4440
struct ggml_tensor * ggml_conv_1d_dw(
4441
        struct ggml_context * ctx,
4442
        struct ggml_tensor  * a,
4443
        struct ggml_tensor  * b,
4444
        int                   s0,
4445
        int                   p0,
4446
0
        int                   d0) {
4447
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4448
4449
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4450
4451
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4452
4453
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4454
4455
0
    return result;
4456
0
}
4457
4458
// ggml_conv_1d_dw_ph
4459
4460
struct ggml_tensor * ggml_conv_1d_dw_ph(
4461
        struct ggml_context * ctx,
4462
        struct ggml_tensor  * a,
4463
        struct ggml_tensor  * b,
4464
        int                   s0,
4465
0
        int                   d0) {
4466
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4467
0
}
4468
4469
// ggml_conv_transpose_1d
4470
4471
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4472
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4473
0
}
4474
4475
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4476
        struct ggml_context * ctx,
4477
        struct ggml_tensor  * a,
4478
        struct ggml_tensor  * b,
4479
        int                   s0,
4480
        int                   p0,
4481
0
        int                   d0) {
4482
0
    GGML_ASSERT(ggml_is_matrix(b));
4483
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4484
0
    GGML_ASSERT(a->ne[3] == 1);
4485
4486
0
    GGML_ASSERT(p0 == 0);
4487
0
    GGML_ASSERT(d0 == 1);
4488
4489
0
    const int64_t ne[4] = {
4490
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4491
0
        a->ne[1], b->ne[2], 1,
4492
0
    };
4493
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4494
4495
0
    int32_t params[] = { s0, p0, d0 };
4496
0
    ggml_set_op_params(result, params, sizeof(params));
4497
4498
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4499
0
    result->src[0] = a;
4500
0
    result->src[1] = b;
4501
4502
0
    return result;
4503
0
}
4504
4505
// ggml_conv_2d
4506
4507
// a: [OC,IC, KH, KW]
4508
// b: [N, IC, IH, IW]
4509
// result: [N, OC, OH, OW]
4510
struct ggml_tensor * ggml_conv_2d(
4511
        struct ggml_context * ctx,
4512
        struct ggml_tensor  * a,
4513
        struct ggml_tensor  * b,
4514
        int                   s0,
4515
        int                   s1,
4516
        int                   p0,
4517
        int                   p1,
4518
        int                   d0,
4519
0
        int                   d1) {
4520
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4521
4522
0
    struct ggml_tensor * result =
4523
0
        ggml_mul_mat(ctx,
4524
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4525
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4526
4527
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4528
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4529
4530
4531
0
    return result;
4532
0
}
4533
4534
// a: [OC*IC, KD, KH, KW]
4535
// b: [N*IC, ID, IH, IW]
4536
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4537
struct ggml_tensor * ggml_im2col_3d(
4538
        struct ggml_context * ctx,
4539
        struct ggml_tensor  * a,
4540
        struct ggml_tensor  * b,
4541
        int64_t               IC,
4542
        int                   s0, // stride width
4543
        int                   s1, // stride height
4544
        int                   s2, // stride depth
4545
        int                   p0, // padding width
4546
        int                   p1, // padding height
4547
        int                   p2, // padding depth
4548
        int                   d0, // dilation width
4549
        int                   d1, // dilation height
4550
        int                   d2, // dilation depth
4551
0
        enum ggml_type        dst_type) {
4552
0
    const int64_t N = b->ne[3] / IC;
4553
0
    const int64_t ID = b->ne[2];
4554
0
    const int64_t IH = b->ne[1];
4555
0
    const int64_t IW = b->ne[0];
4556
4557
0
    const int64_t OC = a->ne[3] / IC;
4558
0
    UNUSED(OC);
4559
0
    const int64_t KD = a->ne[2];
4560
0
    const int64_t KH = a->ne[1];
4561
0
    const int64_t KW = a->ne[0];
4562
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4563
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4564
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4565
4566
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4567
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4568
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4569
4570
4571
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4572
4573
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4574
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4575
0
    ggml_set_op_params(result, params, sizeof(params));
4576
4577
0
    result->op     = GGML_OP_IM2COL_3D;
4578
0
    result->src[0] = a;
4579
0
    result->src[1] = b;
4580
4581
0
    return result;
4582
0
}
4583
4584
// a: [OC*IC, KD, KH, KW]
4585
// b: [N*IC, ID, IH, IW]
4586
// result: [N*OC, OD, OH, OW]
4587
struct ggml_tensor * ggml_conv_3d(
4588
        struct ggml_context * ctx,
4589
        struct ggml_tensor  * a,
4590
        struct ggml_tensor  * b,
4591
        int64_t               IC,
4592
        int                   s0, // stride width
4593
        int                   s1, // stride height
4594
        int                   s2, // stride depth
4595
        int                   p0, // padding width
4596
        int                   p1, // padding height
4597
        int                   p2, // padding depth
4598
        int                   d0, // dilation width
4599
        int                   d1, // dilation height
4600
        int                   d2  // dilation depth
4601
0
        ) {
4602
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4603
4604
0
    int64_t OC = a->ne[3] / IC;
4605
0
    int64_t N = b->ne[3] / IC;
4606
0
    struct ggml_tensor * result =
4607
0
        ggml_mul_mat(ctx,
4608
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4609
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4610
4611
0
    int64_t OD = im2col->ne[3] / N;
4612
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4613
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4614
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4615
4616
0
    return result;
4617
0
}
4618
4619
// ggml_conv_2d_sk_p0
4620
4621
struct ggml_tensor * ggml_conv_2d_sk_p0(
4622
        struct ggml_context * ctx,
4623
        struct ggml_tensor  * a,
4624
0
        struct ggml_tensor  * b) {
4625
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4626
0
}
4627
4628
// ggml_conv_2d_s1_ph
4629
4630
struct ggml_tensor * ggml_conv_2d_s1_ph(
4631
        struct ggml_context * ctx,
4632
        struct ggml_tensor  * a,
4633
0
        struct ggml_tensor  * b) {
4634
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4635
0
}
4636
4637
// ggml_conv_2d_dw
4638
4639
struct ggml_tensor * ggml_conv_2d_dw(
4640
        struct ggml_context * ctx,
4641
        struct ggml_tensor  * a,
4642
        struct ggml_tensor  * b,
4643
        int                   s0,
4644
        int                   s1,
4645
        int                   p0,
4646
        int                   p1,
4647
        int                   d0,
4648
0
        int                   d1) {
4649
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4650
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4651
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4652
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4653
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4654
4655
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4656
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4657
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4658
4659
0
    return result;
4660
0
}
4661
4662
// ggml_conv_2d_dw_direct
4663
4664
struct ggml_tensor * ggml_conv_2d_dw_direct(
4665
        struct ggml_context * ctx,
4666
        struct ggml_tensor  * a,
4667
        struct ggml_tensor  * b,
4668
        int                   stride0,
4669
        int                   stride1,
4670
        int                   pad0,
4671
        int                   pad1,
4672
        int                   dilation0,
4673
0
        int                   dilation1) {
4674
0
    GGML_ASSERT(a->ne[2] == 1);
4675
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4676
0
    int64_t ne[4];
4677
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4678
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4679
0
    ne[2] = b->ne[2];
4680
0
    ne[3] = b->ne[3];
4681
4682
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4683
4684
0
    if (ggml_is_contiguous_channels(b)) {
4685
        // Result will be permuted the same way as input (CWHN order)
4686
0
        const int64_t type_size = ggml_type_size(result->type);
4687
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4688
0
        result->nb[0] = result->ne[2] * type_size;
4689
0
        result->nb[1] = result->ne[0] * result->nb[0];
4690
0
        result->nb[2] = type_size;
4691
0
    }
4692
4693
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4694
0
    ggml_set_op_params(result, params, sizeof(params));
4695
4696
0
    result->op     = GGML_OP_CONV_2D_DW;
4697
0
    result->src[0] = a;
4698
0
    result->src[1] = b;
4699
0
    return result;
4700
0
}
4701
4702
// ggml_conv_2d_direct
4703
4704
struct ggml_tensor * ggml_conv_2d_direct(
4705
        struct ggml_context * ctx,
4706
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4707
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4708
        int                   s0,  // stride dimension 0
4709
        int                   s1,  // stride dimension 1
4710
        int                   p0,  // padding dimension 0
4711
        int                   p1,  // padding dimension 1
4712
        int                   d0,  // dilation dimension 0
4713
0
        int                   d1) {// dilation dimension 1
4714
4715
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4716
    //GGML_ASSERT(a->type == b->type);
4717
4718
0
    int64_t ne[4];
4719
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4720
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4721
0
    ne[2] = a->ne[3];
4722
0
    ne[3] = b->ne[3];
4723
4724
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4725
4726
0
    ggml_set_op_params_i32(result, 0, s0);
4727
0
    ggml_set_op_params_i32(result, 1, s1);
4728
0
    ggml_set_op_params_i32(result, 2, p0);
4729
0
    ggml_set_op_params_i32(result, 3, p1);
4730
0
    ggml_set_op_params_i32(result, 4, d0);
4731
0
    ggml_set_op_params_i32(result, 5, d1);
4732
4733
0
    result->op = GGML_OP_CONV_2D;
4734
0
    result->src[0] = a;
4735
0
    result->src[1] = b;
4736
4737
0
    return result;
4738
0
}
4739
4740
// ggml_conv_3d_direct
4741
4742
struct ggml_tensor * ggml_conv_3d_direct(
4743
        struct ggml_context * ctx,
4744
        struct ggml_tensor  * a,
4745
        struct ggml_tensor  * b,
4746
        int                   s0,
4747
        int                   s1,
4748
        int                   s2,
4749
        int                   p0,
4750
        int                   p1,
4751
        int                   p2,
4752
        int                   d0,
4753
        int                   d1,
4754
        int                   d2,
4755
        int                   c,
4756
        int                   n,
4757
0
        int                   oc) {
4758
4759
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4760
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4761
4762
0
    int64_t ne[4];
4763
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4764
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4765
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4766
0
    ne[3] = (int64_t) oc * n;
4767
4768
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4769
4770
0
    ggml_set_op_params_i32(result, 0,  s0);
4771
0
    ggml_set_op_params_i32(result, 1,  s1);
4772
0
    ggml_set_op_params_i32(result, 2,  s2);
4773
0
    ggml_set_op_params_i32(result, 3,  p0);
4774
0
    ggml_set_op_params_i32(result, 4,  p1);
4775
0
    ggml_set_op_params_i32(result, 5,  p2);
4776
0
    ggml_set_op_params_i32(result, 6,  d0);
4777
0
    ggml_set_op_params_i32(result, 7,  d1);
4778
0
    ggml_set_op_params_i32(result, 8,  d2);
4779
0
    ggml_set_op_params_i32(result, 9,  c);
4780
0
    ggml_set_op_params_i32(result, 10, n);
4781
0
    ggml_set_op_params_i32(result, 11, oc);
4782
4783
0
    result->op = GGML_OP_CONV_3D;
4784
0
    result->src[0] = a;
4785
0
    result->src[1] = b;
4786
4787
0
    return result;
4788
0
}
4789
4790
// ggml_conv_transpose_2d_p0
4791
4792
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4793
0
    return (ins - 1) * s - 2 * p + ks;
4794
0
}
4795
4796
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4797
        struct ggml_context * ctx,
4798
        struct ggml_tensor  * a,
4799
        struct ggml_tensor  * b,
4800
0
        int                   stride) {
4801
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4802
4803
0
    const int64_t ne[4] = {
4804
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4805
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4806
0
        a->ne[2], b->ne[3],
4807
0
    };
4808
4809
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4810
4811
0
    ggml_set_op_params_i32(result, 0, stride);
4812
4813
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4814
0
    result->src[0] = a;
4815
0
    result->src[1] = b;
4816
4817
0
    return result;
4818
0
}
4819
4820
// ggml_pool_*
4821
4822
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4823
0
    return (ins + 2 * p - ks) / s + 1;
4824
0
}
4825
4826
// ggml_pool_1d
4827
4828
struct ggml_tensor * ggml_pool_1d(
4829
        struct ggml_context * ctx,
4830
        struct ggml_tensor  * a,
4831
        enum ggml_op_pool     op,
4832
        int                   k0,
4833
        int                   s0,
4834
0
        int                   p0) {
4835
0
    const int64_t ne[4] = {
4836
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4837
0
        a->ne[1],
4838
0
        a->ne[2],
4839
0
        a->ne[3],
4840
0
    };
4841
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4842
4843
0
    int32_t params[] = { op, k0, s0, p0 };
4844
0
    ggml_set_op_params(result, params, sizeof(params));
4845
4846
0
    result->op     = GGML_OP_POOL_1D;
4847
0
    result->src[0] = a;
4848
4849
0
    return result;
4850
0
}
4851
4852
// ggml_pool_2d
4853
4854
struct ggml_tensor * ggml_pool_2d(
4855
        struct ggml_context * ctx,
4856
        struct ggml_tensor  * a,
4857
        enum ggml_op_pool     op,
4858
        int                   k0,
4859
        int                   k1,
4860
        int                   s0,
4861
        int                   s1,
4862
        float                 p0,
4863
0
        float                 p1) {
4864
0
    struct ggml_tensor * result;
4865
0
    const int64_t ne[4] = {
4866
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4867
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4868
0
        a->ne[2],
4869
0
        a->ne[3],
4870
0
    };
4871
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4872
4873
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4874
0
    ggml_set_op_params(result, params, sizeof(params));
4875
4876
0
    result->op     = GGML_OP_POOL_2D;
4877
0
    result->src[0] = a;
4878
4879
0
    return result;
4880
0
}
4881
4882
struct ggml_tensor * ggml_pool_2d_back(
4883
        struct ggml_context * ctx,
4884
        struct ggml_tensor  * a,
4885
        struct ggml_tensor  * af,
4886
        enum ggml_op_pool     op,
4887
        int                   k0,
4888
        int                   k1,
4889
        int                   s0,
4890
        int                   s1,
4891
        float                 p0,
4892
0
        float                 p1) {
4893
0
    struct ggml_tensor * result;
4894
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4895
4896
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4897
0
    ggml_set_op_params(result, params, sizeof(params));
4898
4899
0
    result->op     = GGML_OP_POOL_2D_BACK;
4900
0
    result->src[0] = a;
4901
0
    result->src[1] = af;
4902
4903
0
    return result;
4904
0
}
4905
4906
// ggml_upscale / ggml_interpolate
4907
4908
static struct ggml_tensor * ggml_interpolate_impl(
4909
        struct ggml_context * ctx,
4910
        struct ggml_tensor  * a,
4911
        int64_t               ne0,
4912
        int64_t               ne1,
4913
        int64_t               ne2,
4914
        int64_t               ne3,
4915
0
        uint32_t              mode) {
4916
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4917
    // TODO: implement antialias for modes other than bilinear
4918
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4919
4920
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4921
4922
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4923
4924
0
    result->op     = GGML_OP_UPSCALE;
4925
0
    result->src[0] = a;
4926
4927
0
    return result;
4928
0
}
4929
4930
struct ggml_tensor * ggml_upscale(
4931
        struct ggml_context * ctx,
4932
        struct ggml_tensor  * a,
4933
        int                   scale_factor,
4934
0
        enum ggml_scale_mode  mode) {
4935
0
    GGML_ASSERT(scale_factor > 1);
4936
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4937
0
}
4938
4939
struct ggml_tensor * ggml_upscale_ext(
4940
        struct ggml_context * ctx,
4941
        struct ggml_tensor  * a,
4942
        int                   ne0,
4943
        int                   ne1,
4944
        int                   ne2,
4945
        int                   ne3,
4946
0
        enum ggml_scale_mode  mode) {
4947
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4948
0
}
4949
4950
struct ggml_tensor * ggml_interpolate(
4951
        struct ggml_context * ctx,
4952
        struct ggml_tensor  * a,
4953
        int64_t               ne0,
4954
        int64_t               ne1,
4955
        int64_t               ne2,
4956
        int64_t               ne3,
4957
0
        uint32_t              mode) {
4958
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4959
0
}
4960
4961
// ggml_pad
4962
4963
struct ggml_tensor * ggml_pad(
4964
        struct ggml_context * ctx,
4965
        struct ggml_tensor  * a,
4966
        int                   p0,
4967
        int                   p1,
4968
        int                   p2,
4969
0
        int                   p3) {
4970
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4971
0
}
4972
4973
// ggml_pad_circular
4974
4975
struct ggml_tensor * ggml_pad_circular(
4976
        struct ggml_context * ctx,
4977
        struct ggml_tensor  * a,
4978
        int                   p0,
4979
        int                   p1,
4980
        int                   p2,
4981
0
        int                   p3) {
4982
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
4983
0
}
4984
4985
struct ggml_tensor * ggml_pad_ext(
4986
            struct ggml_context * ctx,
4987
            struct ggml_tensor  * a,
4988
            int                  lp0,
4989
            int                  rp0,
4990
            int                  lp1,
4991
            int                  rp1,
4992
            int                  lp2,
4993
            int                  rp2,
4994
            int                  lp3,
4995
            int                  rp3
4996
0
            ) {
4997
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
4998
0
            a->ne[0] + lp0 + rp0,
4999
0
            a->ne[1] + lp1 + rp1,
5000
0
            a->ne[2] + lp2 + rp2,
5001
0
            a->ne[3] + lp3 + rp3);
5002
5003
0
    ggml_set_op_params_i32(result, 0, lp0);
5004
0
    ggml_set_op_params_i32(result, 1, rp0);
5005
0
    ggml_set_op_params_i32(result, 2, lp1);
5006
0
    ggml_set_op_params_i32(result, 3, rp1);
5007
0
    ggml_set_op_params_i32(result, 4, lp2);
5008
0
    ggml_set_op_params_i32(result, 5, rp2);
5009
0
    ggml_set_op_params_i32(result, 6, lp3);
5010
0
    ggml_set_op_params_i32(result, 7, rp3);
5011
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5012
5013
5014
0
    result->op     = GGML_OP_PAD;
5015
0
    result->src[0] = a;
5016
5017
0
    return result;
5018
0
}
5019
5020
// ggml_pad_ext_circular
5021
5022
struct ggml_tensor * ggml_pad_ext_circular(
5023
        struct ggml_context * ctx,
5024
        struct ggml_tensor  * a,
5025
        int                  lp0,
5026
        int                  rp0,
5027
        int                  lp1,
5028
        int                  rp1,
5029
        int                  lp2,
5030
        int                  rp2,
5031
        int                  lp3,
5032
        int                  rp3
5033
0
        ) {
5034
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5035
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5036
0
    return result;
5037
0
}
5038
5039
// ggml_pad_reflect_1d
5040
5041
struct ggml_tensor * ggml_pad_reflect_1d(
5042
        struct ggml_context * ctx,
5043
        struct ggml_tensor  * a,
5044
        int                   p0,
5045
0
        int                   p1) {
5046
0
    GGML_ASSERT(p0 >= 0);
5047
0
    GGML_ASSERT(p1 >= 0);
5048
5049
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5050
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5051
5052
0
    GGML_ASSERT(ggml_is_contiguous(a));
5053
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5054
5055
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5056
0
            a->ne[0] + p0 + p1,
5057
0
            a->ne[1],
5058
0
            a->ne[2],
5059
0
            a->ne[3]);
5060
5061
0
    int32_t params[] = { p0, p1 };
5062
0
    ggml_set_op_params(result, params, sizeof(params));
5063
5064
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5065
0
    result->src[0] = a;
5066
5067
0
    return result;
5068
0
}
5069
5070
// ggml_roll
5071
5072
struct ggml_tensor * ggml_roll(
5073
        struct ggml_context * ctx,
5074
        struct ggml_tensor  * a,
5075
        int                   shift0,
5076
        int                   shift1,
5077
        int                   shift2,
5078
0
        int                   shift3) {
5079
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5080
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5081
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5082
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5083
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5084
5085
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5086
5087
0
    ggml_set_op_params_i32(result, 0, shift0);
5088
0
    ggml_set_op_params_i32(result, 1, shift1);
5089
0
    ggml_set_op_params_i32(result, 2, shift2);
5090
0
    ggml_set_op_params_i32(result, 3, shift3);
5091
5092
0
    result->op     = GGML_OP_ROLL;
5093
0
    result->src[0] = a;
5094
5095
0
    return result;
5096
0
}
5097
5098
// ggml_timestep_embedding
5099
5100
struct ggml_tensor * ggml_timestep_embedding(
5101
        struct ggml_context * ctx,
5102
        struct ggml_tensor  * timesteps,
5103
        int                   dim,
5104
0
        int                   max_period) {
5105
5106
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5107
5108
0
    ggml_set_op_params_i32(result, 0, dim);
5109
0
    ggml_set_op_params_i32(result, 1, max_period);
5110
5111
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5112
0
    result->src[0] = timesteps;
5113
5114
0
    return result;
5115
0
}
5116
5117
// ggml_tri
5118
5119
struct ggml_tensor * ggml_tri(
5120
    struct ggml_context * ctx,
5121
    struct ggml_tensor  * a,
5122
0
    enum ggml_tri_type    type) {
5123
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5124
5125
0
    GGML_ASSERT(ggml_is_contiguous(a));
5126
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5127
5128
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5129
5130
0
    ggml_set_op_params_i32(result, 0, type);
5131
5132
0
    result->op = GGML_OP_TRI;
5133
0
    result->src[0] = a;
5134
5135
0
    return result;
5136
0
}
5137
5138
// ggml_fill
5139
5140
static struct ggml_tensor * ggml_fill_impl(
5141
    struct ggml_context * ctx,
5142
    struct ggml_tensor  * a,
5143
    float                 c,
5144
0
    bool                  inplace) {
5145
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5146
0
    GGML_ASSERT(ggml_is_contiguous(a));
5147
5148
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5149
5150
0
    ggml_set_op_params_f32(result, 0, c);
5151
5152
0
    result->op = GGML_OP_FILL;
5153
0
    result->src[0] = a;
5154
5155
0
    return result;
5156
0
}
5157
5158
struct ggml_tensor * ggml_fill(
5159
    struct ggml_context * ctx,
5160
    struct ggml_tensor  * a,
5161
0
    float                 c) {
5162
0
    return ggml_fill_impl(ctx, a, c, false);
5163
0
}
5164
5165
struct ggml_tensor * ggml_fill_inplace(
5166
    struct ggml_context * ctx,
5167
    struct ggml_tensor  * a,
5168
0
    float                 c) {
5169
0
    return ggml_fill_impl(ctx, a, c, true);
5170
0
}
5171
5172
// ggml_argsort
5173
5174
struct ggml_tensor * ggml_argsort(
5175
        struct ggml_context  * ctx,
5176
        struct ggml_tensor   * a,
5177
0
        enum ggml_sort_order   order) {
5178
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5179
5180
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5181
5182
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5183
5184
0
    result->op     = GGML_OP_ARGSORT;
5185
0
    result->src[0] = a;
5186
5187
0
    return result;
5188
0
}
5189
5190
// ggml_argsort_top_k
5191
5192
struct ggml_tensor * ggml_argsort_top_k(
5193
        struct ggml_context * ctx,
5194
        struct ggml_tensor  * a,
5195
0
        int                   k) {
5196
0
    GGML_ASSERT(a->ne[0] >= k);
5197
5198
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5199
5200
0
    result = ggml_view_4d(ctx, result,
5201
0
                k, result->ne[1], result->ne[2], result->ne[3],
5202
0
                   result->nb[1], result->nb[2], result->nb[3],
5203
0
                0);
5204
5205
0
    return result;
5206
0
}
5207
5208
// ggml_top_k
5209
5210
struct ggml_tensor * ggml_top_k(
5211
        struct ggml_context * ctx,
5212
        struct ggml_tensor  * a,
5213
0
        int                   k) {
5214
0
    GGML_ASSERT(a->ne[0] >= k);
5215
5216
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5217
5218
0
    result->op     = GGML_OP_TOP_K;
5219
0
    result->src[0] = a;
5220
5221
0
    return result;
5222
0
}
5223
5224
// ggml_arange
5225
5226
struct ggml_tensor * ggml_arange(
5227
        struct ggml_context * ctx,
5228
        float                 start,
5229
        float                 stop,
5230
0
        float                 step) {
5231
0
    GGML_ASSERT(stop > start);
5232
5233
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5234
5235
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5236
5237
0
    ggml_set_op_params_f32(result, 0, start);
5238
0
    ggml_set_op_params_f32(result, 1, stop);
5239
0
    ggml_set_op_params_f32(result, 2, step);
5240
5241
0
    result->op = GGML_OP_ARANGE;
5242
5243
0
    return result;
5244
0
}
5245
5246
// ggml_flash_attn_ext
5247
5248
struct ggml_tensor * ggml_flash_attn_ext(
5249
        struct ggml_context * ctx,
5250
        struct ggml_tensor  * q,
5251
        struct ggml_tensor  * k,
5252
        struct ggml_tensor  * v,
5253
        struct ggml_tensor  * mask,
5254
        float                 scale,
5255
        float                 max_bias,
5256
0
        float                 logit_softcap) {
5257
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5258
    // TODO: check if vT can be multiplied by (k*qT)
5259
5260
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5261
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5262
5263
0
    if (mask) {
5264
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5265
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5266
5267
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5268
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5269
0
    }
5270
5271
0
    if (max_bias > 0.0f) {
5272
0
        GGML_ASSERT(mask);
5273
0
    }
5274
5275
    // permute(0, 2, 1, 3)
5276
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5277
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5278
5279
0
    float params[] = { scale, max_bias, logit_softcap };
5280
0
    ggml_set_op_params(result, params, sizeof(params));
5281
5282
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5283
0
    result->src[0] = q;
5284
0
    result->src[1] = k;
5285
0
    result->src[2] = v;
5286
0
    result->src[3] = mask;
5287
5288
0
    return result;
5289
0
}
5290
5291
void ggml_flash_attn_ext_set_prec(
5292
        struct ggml_tensor * a,
5293
0
        enum ggml_prec       prec) {
5294
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5295
5296
0
    const int32_t prec_i32 = (int32_t) prec;
5297
5298
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5299
0
}
5300
5301
enum ggml_prec ggml_flash_attn_ext_get_prec(
5302
0
        const struct ggml_tensor * a) {
5303
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5304
5305
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5306
5307
0
    return (enum ggml_prec) prec_i32;
5308
0
}
5309
5310
void ggml_flash_attn_ext_add_sinks(
5311
        struct ggml_tensor * a,
5312
0
        struct ggml_tensor * sinks) {
5313
0
    if (!sinks) {
5314
0
        a->src[4] = NULL;
5315
0
        return;
5316
0
    }
5317
5318
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5319
0
    GGML_ASSERT(a->src[4] == NULL);
5320
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5321
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5322
5323
0
    a->src[4] = sinks;
5324
0
}
5325
5326
// ggml_flash_attn_back
5327
5328
struct ggml_tensor * ggml_flash_attn_back(
5329
        struct ggml_context * ctx,
5330
        struct ggml_tensor  * q,
5331
        struct ggml_tensor  * k,
5332
        struct ggml_tensor  * v,
5333
        struct ggml_tensor  * d,
5334
0
        bool                  masked) {
5335
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5336
5337
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5338
    // TODO: check if vT can be multiplied by (k*qT)
5339
5340
    // d shape [D,N,ne2,ne3]
5341
    // q shape [D,N,ne2,ne3]
5342
    // k shape [D,M,kvne2,ne3]
5343
    // v shape [M,D,kvne2,ne3]
5344
5345
0
    const int64_t     D = q->ne[0];
5346
0
    const int64_t     N = q->ne[1];
5347
0
    const int64_t     M = k->ne[1];
5348
0
    const int64_t   ne2 = q->ne[2];
5349
0
    const int64_t   ne3 = q->ne[3];
5350
0
    const int64_t kvne2 = k->ne[2];
5351
5352
0
    GGML_ASSERT(k->ne[0] == D);
5353
0
    GGML_ASSERT(v->ne[0] == M);
5354
0
    GGML_ASSERT(v->ne[1] == D);
5355
0
    GGML_ASSERT(d->ne[0] == D);
5356
0
    GGML_ASSERT(d->ne[1] == N);
5357
0
    GGML_ASSERT(k->ne[2] == kvne2);
5358
0
    GGML_ASSERT(k->ne[3] == ne3);
5359
0
    GGML_ASSERT(v->ne[2] == kvne2);
5360
0
    GGML_ASSERT(v->ne[3] == ne3);
5361
0
    GGML_ASSERT(d->ne[2] == ne2);
5362
0
    GGML_ASSERT(d->ne[3] == ne3);
5363
5364
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5365
5366
    // store gradients of q, k and v as continuous tensors concatenated in result.
5367
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5368
0
    const int64_t elem_q = ggml_nelements(q);
5369
0
    const int64_t elem_k = ggml_nelements(k);
5370
0
    const int64_t elem_v = ggml_nelements(v);
5371
5372
0
    enum ggml_type result_type = GGML_TYPE_F32;
5373
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5374
0
    const size_t tsize = ggml_type_size(result_type);
5375
5376
0
    const size_t offs_q = 0;
5377
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5378
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5379
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5380
5381
0
    const size_t nelements = (end + tsize - 1)/tsize;
5382
5383
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5384
5385
0
    int32_t masked_i = masked ? 1 : 0;
5386
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5387
5388
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5389
0
    result->src[0] = q;
5390
0
    result->src[1] = k;
5391
0
    result->src[2] = v;
5392
0
    result->src[3] = d;
5393
5394
0
    return result;
5395
0
}
5396
5397
// ggml_ssm_conv
5398
5399
struct ggml_tensor * ggml_ssm_conv(
5400
        struct ggml_context * ctx,
5401
        struct ggml_tensor  * sx,
5402
0
        struct ggml_tensor  * c) {
5403
0
    GGML_ASSERT(ggml_is_3d(sx));
5404
0
    GGML_ASSERT(ggml_is_matrix(c));
5405
5406
0
    const int64_t d_conv  = c->ne[0];
5407
0
    const int64_t d_inner = c->ne[1];
5408
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5409
0
    const int64_t n_s     = sx->ne[2];
5410
5411
    // TODO: maybe support other strides than 1?
5412
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5413
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5414
0
    GGML_ASSERT(n_t >= 0);
5415
5416
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5417
5418
0
    result->op     = GGML_OP_SSM_CONV;
5419
0
    result->src[0] = sx;
5420
0
    result->src[1] = c;
5421
5422
0
    return result;
5423
0
}
5424
5425
// ggml_ssm_scan
5426
5427
struct ggml_tensor * ggml_ssm_scan(
5428
        struct ggml_context * ctx,
5429
        struct ggml_tensor  * s,
5430
        struct ggml_tensor  * x,
5431
        struct ggml_tensor  * dt,
5432
        struct ggml_tensor  * A,
5433
        struct ggml_tensor  * B,
5434
        struct ggml_tensor  * C,
5435
0
        struct ggml_tensor  * ids) {
5436
0
    GGML_ASSERT(ggml_is_contiguous(s));
5437
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5438
0
    GGML_ASSERT(ggml_is_contiguous(A));
5439
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5440
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5441
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5442
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5443
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5444
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5445
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5446
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5447
5448
0
    {
5449
0
        const int64_t d_state      = s->ne[0];
5450
0
        const int64_t head_dim     = x->ne[0];
5451
0
        const int64_t n_head       = x->ne[1];
5452
0
        const int64_t n_seq_tokens = x->ne[2];
5453
0
        const int64_t n_seqs       = x->ne[3];
5454
5455
0
        GGML_ASSERT(dt->ne[0] == n_head);
5456
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5457
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5458
0
        GGML_ASSERT(ggml_is_3d(dt));
5459
0
        GGML_ASSERT(s->ne[1] == head_dim);
5460
0
        GGML_ASSERT(s->ne[2] == n_head);
5461
0
        GGML_ASSERT(B->ne[0] == d_state);
5462
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5463
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5464
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5465
0
        GGML_ASSERT(ggml_is_vector(ids));
5466
0
        GGML_ASSERT(A->ne[1] == n_head);
5467
0
        GGML_ASSERT(ggml_is_matrix(A));
5468
5469
0
        if (A->ne[0] != 1) {
5470
            // Mamba-1 has more granular decay factors
5471
0
            GGML_ASSERT(A->ne[0] == d_state);
5472
0
        }
5473
0
    }
5474
5475
    // concatenated y + ssm_states
5476
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5477
5478
0
    result->op   = GGML_OP_SSM_SCAN;
5479
0
    result->src[0] = s;
5480
0
    result->src[1] = x;
5481
0
    result->src[2] = dt;
5482
0
    result->src[3] = A;
5483
0
    result->src[4] = B;
5484
0
    result->src[5] = C;
5485
0
    result->src[6] = ids;
5486
5487
0
    return result;
5488
0
}
5489
5490
// ggml_win_part
5491
5492
struct ggml_tensor * ggml_win_part(
5493
        struct ggml_context * ctx,
5494
        struct ggml_tensor  * a,
5495
0
        int                   w) {
5496
0
    GGML_ASSERT(a->ne[3] == 1);
5497
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5498
5499
    // padding
5500
0
    const int px = (w - a->ne[1]%w)%w;
5501
0
    const int py = (w - a->ne[2]%w)%w;
5502
5503
0
    const int npx = (px + a->ne[1])/w;
5504
0
    const int npy = (py + a->ne[2])/w;
5505
0
    const int np  = npx*npy;
5506
5507
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5508
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5509
5510
0
    int32_t params[] = { npx, npy, w };
5511
0
    ggml_set_op_params(result, params, sizeof(params));
5512
5513
0
    result->op     = GGML_OP_WIN_PART;
5514
0
    result->src[0] = a;
5515
5516
0
    return result;
5517
0
}
5518
5519
// ggml_win_unpart
5520
5521
struct ggml_tensor * ggml_win_unpart(
5522
        struct ggml_context * ctx,
5523
        struct ggml_tensor  * a,
5524
        int                   w0,
5525
        int                   h0,
5526
0
        int                   w) {
5527
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5528
5529
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5530
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5531
5532
0
    int32_t params[] = { w };
5533
0
    ggml_set_op_params(result, params, sizeof(params));
5534
5535
0
    result->op     = GGML_OP_WIN_UNPART;
5536
0
    result->src[0] = a;
5537
5538
0
    return result;
5539
0
}
5540
5541
// ggml_get_rel_pos
5542
5543
struct ggml_tensor * ggml_get_rel_pos(
5544
        struct ggml_context * ctx,
5545
        struct ggml_tensor  * a,
5546
        int                   qh,
5547
0
        int                   kh) {
5548
0
    GGML_ASSERT(qh == kh);
5549
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5550
5551
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5552
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5553
5554
0
    result->op     = GGML_OP_GET_REL_POS;
5555
0
    result->src[0] = a;
5556
5557
0
    return result;
5558
0
}
5559
5560
// ggml_add_rel_pos
5561
5562
static struct ggml_tensor * ggml_add_rel_pos_impl(
5563
        struct ggml_context * ctx,
5564
        struct ggml_tensor  * a,
5565
        struct ggml_tensor  * pw,
5566
        struct ggml_tensor  * ph,
5567
0
        bool                  inplace) {
5568
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5569
0
    GGML_ASSERT(ggml_is_contiguous(a));
5570
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5571
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5572
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5573
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5574
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5575
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5576
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5577
5578
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5579
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5580
5581
0
    result->op     = GGML_OP_ADD_REL_POS;
5582
0
    result->src[0] = a;
5583
0
    result->src[1] = pw;
5584
0
    result->src[2] = ph;
5585
5586
0
    return result;
5587
0
}
5588
5589
struct ggml_tensor * ggml_add_rel_pos(
5590
        struct ggml_context * ctx,
5591
        struct ggml_tensor  * a,
5592
        struct ggml_tensor  * pw,
5593
0
        struct ggml_tensor  * ph) {
5594
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5595
0
}
5596
5597
struct ggml_tensor * ggml_add_rel_pos_inplace(
5598
        struct ggml_context * ctx,
5599
        struct ggml_tensor  * a,
5600
        struct ggml_tensor  * pw,
5601
0
        struct ggml_tensor  * ph) {
5602
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5603
0
}
5604
5605
// ggml_rwkv_wkv6
5606
5607
struct ggml_tensor * ggml_rwkv_wkv6(
5608
        struct ggml_context * ctx,
5609
        struct ggml_tensor  * k,
5610
        struct ggml_tensor  * v,
5611
        struct ggml_tensor  * r,
5612
        struct ggml_tensor  * tf,
5613
        struct ggml_tensor  * td,
5614
0
        struct ggml_tensor  * state) {
5615
0
    GGML_ASSERT(ggml_is_contiguous(k));
5616
0
    GGML_ASSERT(ggml_is_contiguous(v));
5617
0
    GGML_ASSERT(ggml_is_contiguous(r));
5618
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5619
0
    GGML_ASSERT(ggml_is_contiguous(td));
5620
0
    GGML_ASSERT(ggml_is_contiguous(state));
5621
5622
0
    const int64_t S = k->ne[0];
5623
0
    const int64_t H = k->ne[1];
5624
0
    const int64_t n_tokens = k->ne[2];
5625
0
    const int64_t n_seqs = state->ne[1];
5626
0
    {
5627
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5628
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5629
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5630
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5631
0
    }
5632
5633
    // concat output and new_state
5634
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5635
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5636
5637
0
    result->op     = GGML_OP_RWKV_WKV6;
5638
0
    result->src[0] = k;
5639
0
    result->src[1] = v;
5640
0
    result->src[2] = r;
5641
0
    result->src[3] = tf;
5642
0
    result->src[4] = td;
5643
0
    result->src[5] = state;
5644
5645
0
    return result;
5646
0
}
5647
5648
// ggml_gated_linear_attn
5649
5650
struct ggml_tensor * ggml_gated_linear_attn(
5651
        struct ggml_context * ctx,
5652
        struct ggml_tensor  * k,
5653
        struct ggml_tensor  * v,
5654
        struct ggml_tensor  * q,
5655
        struct ggml_tensor  * g,
5656
        struct ggml_tensor  * state,
5657
0
        float scale) {
5658
0
    GGML_ASSERT(ggml_is_contiguous(k));
5659
0
    GGML_ASSERT(ggml_is_contiguous(v));
5660
0
    GGML_ASSERT(ggml_is_contiguous(q));
5661
0
    GGML_ASSERT(ggml_is_contiguous(g));
5662
0
    GGML_ASSERT(ggml_is_contiguous(state));
5663
5664
0
    const int64_t S = k->ne[0];
5665
0
    const int64_t H = k->ne[1];
5666
0
    const int64_t n_tokens = k->ne[2];
5667
0
    const int64_t n_seqs = state->ne[1];
5668
0
    {
5669
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5670
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5671
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5672
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5673
0
    }
5674
5675
    // concat output and new_state
5676
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5677
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5678
5679
0
    ggml_set_op_params_f32(result, 0, scale);
5680
5681
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5682
0
    result->src[0] = k;
5683
0
    result->src[1] = v;
5684
0
    result->src[2] = q;
5685
0
    result->src[3] = g;
5686
0
    result->src[4] = state;
5687
5688
0
    return result;
5689
0
}
5690
5691
// ggml_rwkv_wkv7
5692
5693
struct ggml_tensor * ggml_rwkv_wkv7(
5694
        struct ggml_context * ctx,
5695
        struct ggml_tensor  * r,
5696
        struct ggml_tensor  * w,
5697
        struct ggml_tensor  * k,
5698
        struct ggml_tensor  * v,
5699
        struct ggml_tensor  * a,
5700
        struct ggml_tensor  * b,
5701
0
        struct ggml_tensor  * state) {
5702
0
    GGML_ASSERT(ggml_is_contiguous(r));
5703
0
    GGML_ASSERT(ggml_is_contiguous(w));
5704
0
    GGML_ASSERT(ggml_is_contiguous(k));
5705
0
    GGML_ASSERT(ggml_is_contiguous(v));
5706
0
    GGML_ASSERT(ggml_is_contiguous(a));
5707
0
    GGML_ASSERT(ggml_is_contiguous(b));
5708
0
    GGML_ASSERT(ggml_is_contiguous(state));
5709
5710
0
    const int64_t S = k->ne[0];
5711
0
    const int64_t H = k->ne[1];
5712
0
    const int64_t n_tokens = k->ne[2];
5713
0
    const int64_t n_seqs = state->ne[1];
5714
0
    {
5715
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5716
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5717
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5718
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5719
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5720
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5721
0
    }
5722
5723
    // concat output and new_state
5724
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5725
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5726
5727
0
    result->op     = GGML_OP_RWKV_WKV7;
5728
0
    result->src[0] = r;
5729
0
    result->src[1] = w;
5730
0
    result->src[2] = k;
5731
0
    result->src[3] = v;
5732
0
    result->src[4] = a;
5733
0
    result->src[5] = b;
5734
0
    result->src[6] = state;
5735
5736
0
    return result;
5737
0
}
5738
5739
// ggml_unary
5740
5741
static struct ggml_tensor * ggml_unary_impl(
5742
        struct ggml_context * ctx,
5743
        struct ggml_tensor  * a,
5744
        enum ggml_unary_op    op,
5745
0
        bool                  inplace) {
5746
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
5747
5748
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5749
5750
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5751
5752
0
    result->op     = GGML_OP_UNARY;
5753
0
    result->src[0] = a;
5754
5755
0
    return result;
5756
0
}
5757
5758
struct ggml_tensor * ggml_unary(
5759
        struct ggml_context * ctx,
5760
        struct ggml_tensor  * a,
5761
0
        enum ggml_unary_op    op) {
5762
0
    return ggml_unary_impl(ctx, a, op, false);
5763
0
}
5764
5765
struct ggml_tensor * ggml_unary_inplace(
5766
        struct ggml_context * ctx,
5767
        struct ggml_tensor  * a,
5768
0
        enum ggml_unary_op    op) {
5769
0
    return ggml_unary_impl(ctx, a, op, true);
5770
0
}
5771
5772
// ggml_map_custom1
5773
5774
static struct ggml_tensor * ggml_map_custom1_impl(
5775
        struct ggml_context      * ctx,
5776
        struct ggml_tensor       * a,
5777
        const  ggml_custom1_op_t   fun,
5778
        int                        n_tasks,
5779
        void                     * userdata,
5780
0
        bool                       inplace) {
5781
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5782
5783
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5784
5785
0
    struct ggml_map_custom1_op_params params = {
5786
0
        /*.fun      =*/ fun,
5787
0
        /*.n_tasks  =*/ n_tasks,
5788
0
        /*.userdata =*/ userdata
5789
0
    };
5790
0
    ggml_set_op_params(result, &params, sizeof(params));
5791
5792
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5793
0
    result->src[0] = a;
5794
5795
0
    return result;
5796
0
}
5797
5798
struct ggml_tensor * ggml_map_custom1(
5799
        struct ggml_context      * ctx,
5800
        struct ggml_tensor       * a,
5801
        const  ggml_custom1_op_t   fun,
5802
        int                        n_tasks,
5803
0
        void                     * userdata) {
5804
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5805
0
}
5806
5807
struct ggml_tensor * ggml_map_custom1_inplace(
5808
        struct ggml_context      * ctx,
5809
        struct ggml_tensor       * a,
5810
        const  ggml_custom1_op_t   fun,
5811
        int                        n_tasks,
5812
0
        void                     * userdata) {
5813
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5814
0
}
5815
5816
// ggml_map_custom2
5817
5818
static struct ggml_tensor * ggml_map_custom2_impl(
5819
        struct ggml_context      * ctx,
5820
        struct ggml_tensor       * a,
5821
        struct ggml_tensor       * b,
5822
        const  ggml_custom2_op_t   fun,
5823
        int                        n_tasks,
5824
        void                     * userdata,
5825
0
        bool                       inplace) {
5826
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5827
5828
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5829
5830
0
    struct ggml_map_custom2_op_params params = {
5831
0
        /*.fun      =*/ fun,
5832
0
        /*.n_tasks  =*/ n_tasks,
5833
0
        /*.userdata =*/ userdata
5834
0
    };
5835
0
    ggml_set_op_params(result, &params, sizeof(params));
5836
5837
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5838
0
    result->src[0] = a;
5839
0
    result->src[1] = b;
5840
5841
0
    return result;
5842
0
}
5843
5844
struct ggml_tensor * ggml_map_custom2(
5845
        struct ggml_context      * ctx,
5846
        struct ggml_tensor       * a,
5847
        struct ggml_tensor       * b,
5848
        const  ggml_custom2_op_t   fun,
5849
        int                        n_tasks,
5850
0
        void                     * userdata) {
5851
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5852
0
}
5853
5854
struct ggml_tensor * ggml_map_custom2_inplace(
5855
        struct ggml_context      * ctx,
5856
        struct ggml_tensor       * a,
5857
        struct ggml_tensor       * b,
5858
        const  ggml_custom2_op_t   fun,
5859
        int                        n_tasks,
5860
0
        void                     * userdata) {
5861
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5862
0
}
5863
5864
// ggml_map_custom3
5865
5866
static struct ggml_tensor * ggml_map_custom3_impl(
5867
        struct ggml_context      * ctx,
5868
        struct ggml_tensor       * a,
5869
        struct ggml_tensor       * b,
5870
        struct ggml_tensor       * c,
5871
        const  ggml_custom3_op_t   fun,
5872
        int                        n_tasks,
5873
        void                     * userdata,
5874
0
        bool                       inplace) {
5875
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5876
5877
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5878
5879
0
    struct ggml_map_custom3_op_params params = {
5880
0
        /*.fun      =*/ fun,
5881
0
        /*.n_tasks  =*/ n_tasks,
5882
0
        /*.userdata =*/ userdata
5883
0
    };
5884
0
    ggml_set_op_params(result, &params, sizeof(params));
5885
5886
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5887
0
    result->src[0] = a;
5888
0
    result->src[1] = b;
5889
0
    result->src[2] = c;
5890
5891
0
    return result;
5892
0
}
5893
5894
struct ggml_tensor * ggml_map_custom3(
5895
        struct ggml_context      * ctx,
5896
        struct ggml_tensor       * a,
5897
        struct ggml_tensor       * b,
5898
        struct ggml_tensor       * c,
5899
        const  ggml_custom3_op_t   fun,
5900
        int                        n_tasks,
5901
0
        void                     * userdata) {
5902
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5903
0
}
5904
5905
struct ggml_tensor * ggml_map_custom3_inplace(
5906
        struct ggml_context      * ctx,
5907
        struct ggml_tensor       * a,
5908
        struct ggml_tensor       * b,
5909
        struct ggml_tensor       * c,
5910
        const  ggml_custom3_op_t   fun,
5911
        int                        n_tasks,
5912
0
        void                     * userdata) {
5913
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5914
0
}
5915
5916
struct ggml_tensor * ggml_custom_4d(
5917
        struct ggml_context * ctx,
5918
        enum ggml_type        type,
5919
        int64_t               ne0,
5920
        int64_t               ne1,
5921
        int64_t               ne2,
5922
        int64_t               ne3,
5923
        struct ggml_tensor ** args,
5924
        int                   n_args,
5925
        ggml_custom_op_t      fun,
5926
        int                   n_tasks,
5927
0
        void                * userdata) {
5928
5929
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5930
5931
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5932
5933
0
    struct ggml_custom_op_params params = {
5934
0
        /*.fun      =*/ fun,
5935
0
        /*.n_tasks  =*/ n_tasks,
5936
0
        /*.userdata =*/ userdata
5937
0
    };
5938
0
    ggml_set_op_params(result, &params, sizeof(params));
5939
5940
0
    result->op = GGML_OP_CUSTOM;
5941
0
    for (int i = 0; i < n_args; i++) {
5942
0
        result->src[i] = args[i];
5943
0
    }
5944
5945
0
    return result;
5946
0
}
5947
5948
struct ggml_tensor * ggml_custom_inplace(
5949
        struct ggml_context * ctx,
5950
        struct ggml_tensor  * a,
5951
        struct ggml_tensor ** args,
5952
        int                   n_args,
5953
        ggml_custom_op_t      fun,
5954
        int                   n_tasks,
5955
0
        void                * userdata) {
5956
5957
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
5958
5959
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5960
5961
0
    struct ggml_custom_op_params params = {
5962
0
        /*.fun      =*/ fun,
5963
0
        /*.n_tasks  =*/ n_tasks,
5964
0
        /*.userdata =*/ userdata
5965
0
    };
5966
0
    ggml_set_op_params(result, &params, sizeof(params));
5967
5968
0
    result->op = GGML_OP_CUSTOM;
5969
0
    result->src[0] = a;
5970
0
    for (int i = 0; i < n_args; i++) {
5971
0
        result->src[i + 1] = args[i];
5972
0
    }
5973
5974
0
    return result;
5975
0
}
5976
// ggml_cross_entropy_loss
5977
5978
struct ggml_tensor * ggml_cross_entropy_loss(
5979
        struct ggml_context * ctx,
5980
        struct ggml_tensor  * a,
5981
0
        struct ggml_tensor  * b) {
5982
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
5983
5984
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
5985
5986
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
5987
0
    result->src[0] = a;
5988
0
    result->src[1] = b;
5989
5990
0
    return result;
5991
0
}
5992
5993
// ggml_cross_entropy_loss_back
5994
5995
struct ggml_tensor * ggml_cross_entropy_loss_back(
5996
        struct ggml_context * ctx,
5997
        struct ggml_tensor  * a,
5998
        struct ggml_tensor  * b,
5999
0
        struct ggml_tensor  * c) {
6000
0
    GGML_ASSERT(ggml_is_scalar(a));
6001
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6002
6003
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6004
6005
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6006
0
    result->src[0] = a;
6007
0
    result->src[1] = b;
6008
0
    result->src[2] = c;
6009
6010
0
    return result;
6011
0
}
6012
6013
// opt_step_adamw
6014
6015
struct ggml_tensor * ggml_opt_step_adamw(
6016
        struct ggml_context * ctx,
6017
        struct ggml_tensor  * a,
6018
        struct ggml_tensor  * grad,
6019
        struct ggml_tensor  * m,
6020
        struct ggml_tensor  * v,
6021
0
        struct ggml_tensor  * adamw_params) {
6022
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6023
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6024
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6025
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6026
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6027
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6028
6029
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6030
6031
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6032
0
    result->src[0] = a;
6033
0
    result->src[1] = grad;
6034
0
    result->src[2] = m;
6035
0
    result->src[3] = v;
6036
0
    result->src[4] = adamw_params;
6037
6038
0
    return result;
6039
0
}
6040
6041
// opt_step_sgd
6042
6043
struct ggml_tensor * ggml_opt_step_sgd(
6044
        struct ggml_context * ctx,
6045
        struct ggml_tensor  * a,
6046
        struct ggml_tensor  * grad,
6047
0
        struct ggml_tensor  * params) {
6048
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6049
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6050
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6051
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6052
6053
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6054
6055
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6056
0
    result->src[0] = a;
6057
0
    result->src[1] = grad;
6058
0
    result->src[2] = params;
6059
6060
0
    return result;
6061
0
}
6062
6063
// solve_tri
6064
6065
struct ggml_tensor * ggml_solve_tri(
6066
        struct ggml_context * ctx,
6067
        struct ggml_tensor  * a,
6068
        struct ggml_tensor  * b,
6069
        bool                  left,
6070
        bool                  lower,
6071
0
        bool                  uni) {
6072
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6073
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6074
6075
    // A must be square and lower diagonal
6076
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6077
    // B must have same outer dimension as A
6078
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6079
6080
    // batch dimensions must be equal
6081
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6082
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6083
6084
0
    GGML_ASSERT(ggml_is_contiguous(a));
6085
0
    GGML_ASSERT(ggml_is_contiguous(b));
6086
6087
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6088
6089
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6090
6091
0
    result->op     = GGML_OP_SOLVE_TRI;
6092
0
    result->src[0] = a;
6093
0
    result->src[1] = b;
6094
6095
0
    return result;
6096
0
}
6097
6098
////////////////////////////////////////////////////////////////////////////////
6099
6100
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6101
0
    size = ggml_hash_size(size);
6102
0
    struct ggml_hash_set result;
6103
0
    result.size = size;
6104
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6105
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6106
0
    return result;
6107
0
}
6108
6109
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6110
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6111
0
}
6112
6113
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6114
0
    GGML_FREE(hash_set->used);
6115
0
    GGML_FREE(hash_set->keys);
6116
0
}
6117
6118
0
size_t ggml_hash_size(size_t min_sz) {
6119
    // next primes after powers of two
6120
0
    static const size_t primes[] = {
6121
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6122
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6123
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6124
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6125
0
        536870923, 1073741827, 2147483659
6126
0
    };
6127
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6128
6129
    // find the smallest prime that is larger or equal than min_sz
6130
0
    size_t l = 0;
6131
0
    size_t r = n_primes;
6132
0
    while (l < r) {
6133
0
        size_t m = (l + r)/2;
6134
0
        if (primes[m] < min_sz) {
6135
0
            l = m + 1;
6136
0
        } else {
6137
0
            r = m;
6138
0
        }
6139
0
    }
6140
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6141
0
    return sz;
6142
0
}
6143
6144
struct hash_map {
6145
    struct ggml_hash_set set;
6146
    struct ggml_tensor ** vals;
6147
};
6148
6149
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6150
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6151
0
    result->set = ggml_hash_set_new(size);
6152
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6153
0
    return result;
6154
0
}
6155
6156
0
static void ggml_hash_map_free(struct hash_map * map) {
6157
0
    ggml_hash_set_free(&map->set);
6158
0
    GGML_FREE(map->vals);
6159
0
    GGML_FREE(map);
6160
0
}
6161
6162
// utility functions to change gradients
6163
// isrc is the index of tensor in cgraph->visited_has_set.keys
6164
// the corresponding gradient (accumulators) are also at position isrc
6165
// if tensor has a gradient accumulator, modify that accumulator in-place
6166
// else if there is no gradient for tensor, set the corresponding value
6167
// else, just add/subtract/etc. the gradients
6168
6169
static void ggml_add_or_set(
6170
        struct ggml_context * ctx,
6171
        struct ggml_cgraph  * cgraph,
6172
        size_t                isrc,
6173
0
        struct ggml_tensor  * tensor) {
6174
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6175
0
    GGML_ASSERT(src);
6176
0
    if (cgraph->grads[isrc]) {
6177
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6178
0
    } else {
6179
0
        cgraph->grads[isrc] = tensor;
6180
0
    }
6181
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6182
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6183
0
}
6184
6185
static void ggml_acc_or_set(
6186
        struct ggml_context * ctx,
6187
        struct ggml_cgraph  * cgraph,
6188
        size_t                isrc,
6189
        struct ggml_tensor  * tensor,
6190
        const  size_t         nb1,
6191
        const  size_t         nb2,
6192
        const  size_t         nb3,
6193
0
        const  size_t         offset) {
6194
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6195
0
    GGML_ASSERT(src);
6196
0
    if (cgraph->grads[isrc]) {
6197
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6198
0
    } else {
6199
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6200
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6201
0
    }
6202
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6203
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6204
0
}
6205
6206
static void ggml_add1_or_set(
6207
        struct ggml_context * ctx,
6208
        struct ggml_cgraph  * cgraph,
6209
        size_t                isrc,
6210
0
        struct ggml_tensor  * tensor) {
6211
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6212
0
    GGML_ASSERT(src);
6213
0
    if (cgraph->grads[isrc]) {
6214
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6215
0
    } else {
6216
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6217
0
    }
6218
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6219
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6220
0
}
6221
6222
static void ggml_sub_or_set(
6223
        struct ggml_context * ctx,
6224
        struct ggml_cgraph  * cgraph,
6225
        size_t                isrc,
6226
0
        struct ggml_tensor  * tensor) {
6227
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6228
0
    GGML_ASSERT(src);
6229
0
    if (cgraph->grads[isrc]) {
6230
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6231
0
    } else {
6232
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6233
0
    }
6234
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6235
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6236
0
}
6237
6238
static void ggml_compute_backward(
6239
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6240
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6241
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6242
6243
0
    if (!grad) {
6244
0
        return;
6245
0
    }
6246
6247
0
    struct ggml_tensor * src0 = tensor->src[0];
6248
0
    struct ggml_tensor * src1 = tensor->src[1];
6249
0
    struct ggml_tensor * src2 = tensor->src[2];
6250
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6251
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6252
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6253
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6254
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6255
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6256
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6257
6258
0
    switch (tensor->op) {
6259
0
        case GGML_OP_DUP: {
6260
0
            if (src0_needs_grads) {
6261
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6262
0
            }
6263
0
        } break;
6264
0
        case GGML_OP_ADD: {
6265
0
            if (src0_needs_grads) {
6266
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6267
0
            }
6268
0
            if (src1_needs_grads) {
6269
0
                struct ggml_tensor * tmp = grad;
6270
0
                if (!ggml_are_same_shape(src0, src1)) {
6271
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6272
0
                }
6273
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6274
0
            }
6275
0
        } break;
6276
0
        case GGML_OP_ADD1: {
6277
0
            if (src0_needs_grads) {
6278
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6279
0
            }
6280
0
            if (src1_needs_grads) {
6281
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6282
0
            }
6283
0
        } break;
6284
0
        case GGML_OP_ACC: {
6285
0
            if (src0_needs_grads) {
6286
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6287
0
            }
6288
0
            if (src1_needs_grads) {
6289
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6290
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6291
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6292
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6293
6294
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6295
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6296
0
                    nb1, nb2, nb3, offset);
6297
6298
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6299
0
            }
6300
0
        } break;
6301
0
        case GGML_OP_SUB: {
6302
0
            if (src0_needs_grads) {
6303
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6304
0
            }
6305
0
            if (src1_needs_grads) {
6306
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6307
0
            }
6308
0
        } break;
6309
0
        case GGML_OP_MUL: {
6310
0
            if (src0_needs_grads) {
6311
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6312
0
            }
6313
0
            if (src1_needs_grads) {
6314
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6315
0
                if (!ggml_are_same_shape(src0, src1)) {
6316
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6317
0
                }
6318
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6319
0
            }
6320
0
        } break;
6321
0
        case GGML_OP_DIV: {
6322
0
            if (src0_needs_grads) {
6323
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6324
0
            }
6325
0
            if (src1_needs_grads) {
6326
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6327
0
            }
6328
0
        } break;
6329
0
        case GGML_OP_SQR: {
6330
0
            if (src0_needs_grads) {
6331
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6332
0
            }
6333
0
        } break;
6334
0
        case GGML_OP_SQRT: {
6335
0
            if (src0_needs_grads) {
6336
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6337
0
            }
6338
0
        } break;
6339
0
        case GGML_OP_LOG: {
6340
0
            if (src0_needs_grads) {
6341
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6342
0
            }
6343
0
        } break;
6344
0
        case GGML_OP_SIN: {
6345
0
            if (src0_needs_grads) {
6346
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6347
0
            }
6348
0
        } break;
6349
0
        case GGML_OP_COS: {
6350
0
            if (src0_needs_grads) {
6351
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6352
0
            }
6353
0
        } break;
6354
0
        case GGML_OP_SUM: {
6355
0
            if (src0_needs_grads) {
6356
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6357
0
            }
6358
0
        } break;
6359
0
        case GGML_OP_SUM_ROWS: {
6360
0
            if (src0_needs_grads) {
6361
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6362
0
            }
6363
0
        } break;
6364
0
        case GGML_OP_MEAN: {
6365
0
            if (src0_needs_grads) {
6366
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6367
0
            }
6368
0
        } break;
6369
0
        case GGML_OP_REPEAT: {
6370
0
            if (src0_needs_grads) {
6371
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6372
0
            }
6373
0
        } break;
6374
0
        case GGML_OP_REPEAT_BACK: {
6375
0
            if (src0_needs_grads) {
6376
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6377
0
            }
6378
0
        } break;
6379
0
        case GGML_OP_RMS_NORM: {
6380
0
            if (src0_needs_grads) {
6381
0
                float eps;
6382
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6383
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6384
0
            }
6385
0
        } break;
6386
0
        case GGML_OP_MUL_MAT: {
6387
            // https://cs231n.github.io/optimization-2/#staged
6388
            // # forward pass
6389
            // s0 = np.random.randn(5, 10)
6390
            // s1 = np.random.randn(10, 3)
6391
            // t = s0.dot(s1)
6392
6393
            // # now suppose we had the gradient on t from above in the circuit
6394
            // dt = np.random.randn(*t.shape) # same shape as t
6395
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6396
            // ds1 = t.T.dot(dt)
6397
6398
            // tensor.shape [m,p,qq,rr]
6399
            // src0.shape   [n,m,q1,r1]
6400
            // src1.shape   [n,p,qq,rr]
6401
6402
0
            if (src0_needs_grads) {
6403
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6404
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6405
0
                struct ggml_tensor * tmp =
6406
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6407
0
                        src1,          // [n,p,qq,rr]
6408
0
                        grad);         // [m,p,qq,rr]
6409
0
                if (!ggml_are_same_shape(tmp, src0)) {
6410
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6411
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6412
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6413
6414
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6415
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6416
0
                    const size_t nb3 = tmp->nb[2];
6417
6418
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6419
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6420
0
                }
6421
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6422
0
            }
6423
0
            if (src1_needs_grads) {
6424
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6425
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6426
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6427
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6428
                        //     grad),                          // [m,p,qq,rr]
6429
6430
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6431
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6432
                        // and then use ggml_out_prod
6433
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6434
0
                            src0,               // [n,m,q1,r1]
6435
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6436
0
                                grad)));        // [m,p,qq,rr]
6437
0
            }
6438
0
        } break;
6439
0
        case GGML_OP_SCALE: {
6440
0
            if (src0_needs_grads) {
6441
0
                float s;
6442
0
                memcpy(&s, tensor->op_params, sizeof(float));
6443
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6444
0
            }
6445
0
        } break;
6446
0
        case GGML_OP_SET: {
6447
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6448
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6449
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6450
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6451
6452
0
            struct ggml_tensor * tensor_grad_view = NULL;
6453
6454
0
            if (src0_needs_grads || src1_needs_grads) {
6455
0
                GGML_ASSERT(src0->type == tensor->type);
6456
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6457
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6458
6459
0
                tensor_grad_view = ggml_view_4d(ctx,
6460
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6461
0
                    nb1, nb2, nb3, offset);
6462
0
            }
6463
6464
0
            if (src0_needs_grads) {
6465
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6466
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6467
0
            }
6468
6469
0
            if (src1_needs_grads) {
6470
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6471
0
            }
6472
0
        } break;
6473
0
        case GGML_OP_CPY: {
6474
            // cpy overwrites value of src1 by src0 and returns view(src1)
6475
            // the overwriting is mathematically equivalent to:
6476
            // tensor = src0 * 1 + src1 * 0
6477
0
            if (src0_needs_grads) {
6478
                // dsrc0 = dtensor * 1
6479
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6480
0
            }
6481
0
            if (src1_needs_grads) {
6482
                // dsrc1 = dtensor * 0 -> noop
6483
0
            }
6484
0
        } break;
6485
0
        case GGML_OP_CONT: {
6486
            // same as cpy
6487
0
            if (src0_needs_grads) {
6488
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6489
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6490
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6491
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6492
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6493
0
            }
6494
0
        } break;
6495
0
        case GGML_OP_RESHAPE: {
6496
0
            if (src0_needs_grads) {
6497
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6498
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6499
0
            }
6500
0
        } break;
6501
0
        case GGML_OP_VIEW: {
6502
0
            if (src0_needs_grads) {
6503
0
                size_t offset;
6504
6505
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6506
6507
0
                size_t nb1 = tensor->nb[1];
6508
0
                size_t nb2 = tensor->nb[2];
6509
0
                size_t nb3 = tensor->nb[3];
6510
6511
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6512
                    // gradient is typically F32, but src0 could be other type
6513
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6514
0
                    size_t n0 = ggml_element_size(src0);
6515
0
                    GGML_ASSERT(offset % n0 == 0);
6516
0
                    GGML_ASSERT(nb1 % n0 == 0);
6517
0
                    GGML_ASSERT(nb2 % n0 == 0);
6518
0
                    GGML_ASSERT(nb3 % n0 == 0);
6519
0
                    offset = (offset / n0) * ng;
6520
0
                    nb1 = (nb1 / n0) * ng;
6521
0
                    nb2 = (nb2 / n0) * ng;
6522
0
                    nb3 = (nb3 / n0) * ng;
6523
0
                }
6524
6525
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6526
0
            }
6527
0
        } break;
6528
0
        case GGML_OP_PERMUTE: {
6529
0
            if (src0_needs_grads) {
6530
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6531
0
                const int axis0 = axes[0] & 0x3;
6532
0
                const int axis1 = axes[1] & 0x3;
6533
0
                const int axis2 = axes[2] & 0x3;
6534
0
                const int axis3 = axes[3] & 0x3;
6535
0
                int axb[4] = {0,0,0,0}; // axes backward
6536
0
                axb[axis0] = 0;
6537
0
                axb[axis1] = 1;
6538
0
                axb[axis2] = 2;
6539
0
                axb[axis3] = 3;
6540
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6541
0
            }
6542
0
        } break;
6543
0
        case GGML_OP_TRANSPOSE: {
6544
0
            if (src0_needs_grads) {
6545
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6546
0
            }
6547
0
        } break;
6548
0
        case GGML_OP_GET_ROWS: {
6549
0
            if (src0_needs_grads) {
6550
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6551
0
            }
6552
0
            if (src1_needs_grads) {
6553
                // noop
6554
0
            }
6555
0
        } break;
6556
0
        case GGML_OP_DIAG_MASK_INF: {
6557
0
            if (src0_needs_grads) {
6558
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6559
                /* ref:  https://github.com/ggerganov/llama.cpp/pull/4203#discussion_r1412377992 */
6560
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6561
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6562
0
            }
6563
0
        } break;
6564
0
        case GGML_OP_DIAG_MASK_ZERO: {
6565
0
            if (src0_needs_grads) {
6566
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6567
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6568
0
            }
6569
0
        } break;
6570
0
        case GGML_OP_SOFT_MAX: {
6571
0
            if (src0_needs_grads) {
6572
0
                float scale    = 1.0f;
6573
0
                float max_bias = 0.0f;
6574
6575
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6576
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6577
6578
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6579
0
            }
6580
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6581
0
        } break;
6582
0
        case GGML_OP_ROPE: {
6583
0
            if (src0_needs_grads) {
6584
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6585
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6586
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6587
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6588
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6589
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6590
0
                int sections[4] = {0, 0, 0, 0};
6591
6592
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6593
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6594
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6595
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6596
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6597
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6598
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6599
6600
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6601
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6602
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6603
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6604
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6605
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6606
0
            }
6607
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6608
0
        } break;
6609
0
        case GGML_OP_IM2COL: {
6610
0
            if (src1_needs_grads) {
6611
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6612
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6613
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6614
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6615
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6616
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6617
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6618
6619
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6620
0
            }
6621
0
        } break;
6622
0
        case GGML_OP_POOL_2D: {
6623
0
            if (src0_needs_grads) {
6624
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6625
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6626
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6627
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6628
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6629
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6630
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6631
6632
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6633
0
            }
6634
0
        } break;
6635
0
        case GGML_OP_WIN_PART:
6636
0
        case GGML_OP_WIN_UNPART:
6637
0
        case GGML_OP_UNARY: {
6638
0
            switch (ggml_get_unary_op(tensor)) {
6639
0
                case GGML_UNARY_OP_ABS: {
6640
0
                    if (src0_needs_grads) {
6641
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6642
0
                    }
6643
0
                } break;
6644
0
                case GGML_UNARY_OP_SGN: {
6645
                    // noop
6646
0
                } break;
6647
0
                case GGML_UNARY_OP_NEG: {
6648
0
                    if (src0_needs_grads) {
6649
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6650
0
                    }
6651
0
                } break;
6652
0
                case GGML_UNARY_OP_STEP: {
6653
                    // noop
6654
0
                } break;
6655
0
                case GGML_UNARY_OP_RELU: {
6656
0
                    if (src0_needs_grads) {
6657
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6658
0
                    }
6659
0
                } break;
6660
0
                case GGML_UNARY_OP_SILU: {
6661
0
                    if (src0_needs_grads) {
6662
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6663
0
                    }
6664
0
                } break;
6665
0
                case GGML_UNARY_OP_EXP: {
6666
0
                    if (src0_needs_grads) {
6667
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6668
0
                    }
6669
0
                } break;
6670
0
                case GGML_UNARY_OP_EXPM1: {
6671
0
                    if (src0_needs_grads) {
6672
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6673
0
                    }
6674
0
                } break;
6675
0
                case GGML_UNARY_OP_SOFTPLUS: {
6676
0
                    if (src0_needs_grads) {
6677
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6678
0
                    }
6679
0
                } break;
6680
0
                default: {
6681
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6682
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6683
0
                    GGML_ABORT("fatal error");
6684
0
                } //break;
6685
0
            }
6686
0
        } break;
6687
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6688
0
            if (src0_needs_grads) {
6689
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6690
0
            }
6691
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6692
0
        } break;
6693
0
        case GGML_OP_GLU: {
6694
0
            switch (ggml_get_glu_op(tensor)) {
6695
0
                case GGML_GLU_OP_SWIGLU: {
6696
0
                    if (src0_needs_grads) {
6697
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6698
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6699
0
                    }
6700
0
                    if (src1_needs_grads) {
6701
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6702
0
                    }
6703
0
                } break;
6704
0
                default: {
6705
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6706
0
                } //break;
6707
0
            }
6708
0
        } break;
6709
0
        case GGML_OP_NONE: {
6710
            // noop
6711
0
        } break;
6712
0
        case GGML_OP_COUNT:
6713
0
        default: {
6714
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6715
0
        } //break;
6716
0
    }
6717
6718
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6719
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6720
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6721
0
}
6722
6723
0
static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) {
6724
    // check if already visited
6725
0
    size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6726
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6727
0
    if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6728
        // This is the first time we see this node in the current graph.
6729
0
        cgraph->visited_hash_set.keys[node_hash_pos] = node;
6730
0
        ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6731
0
        cgraph->use_counts[node_hash_pos] = 0;
6732
0
    } else {
6733
        // already visited
6734
0
        return node_hash_pos;
6735
0
    }
6736
6737
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6738
0
        const int k =
6739
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6740
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6741
0
            /* unknown order, just fall back to using i */ i;
6742
6743
0
        struct ggml_tensor * src = node->src[k];
6744
0
        if (src) {
6745
0
            size_t src_hash_pos = ggml_visit_parents(cgraph, src);
6746
6747
            // Update the use count for this operand.
6748
0
            cgraph->use_counts[src_hash_pos]++;
6749
0
        }
6750
0
    }
6751
6752
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6753
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6754
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6755
6756
0
        if (strlen(node->name) == 0) {
6757
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6758
0
        }
6759
6760
0
        cgraph->leafs[cgraph->n_leafs] = node;
6761
0
        cgraph->n_leafs++;
6762
0
    } else {
6763
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6764
6765
0
        if (strlen(node->name) == 0) {
6766
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6767
0
        }
6768
6769
0
        cgraph->nodes[cgraph->n_nodes] = node;
6770
0
        cgraph->n_nodes++;
6771
0
    }
6772
6773
0
    return node_hash_pos;
6774
0
}
6775
6776
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) {
6777
0
    if (!expand) {
6778
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6779
0
        ggml_graph_clear(cgraph);
6780
0
    }
6781
6782
0
    const int n0 = cgraph->n_nodes;
6783
6784
0
    ggml_visit_parents(cgraph, tensor);
6785
6786
0
    const int n_new = cgraph->n_nodes - n0;
6787
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6788
6789
0
    if (n_new > 0) {
6790
        // the last added node should always be starting point
6791
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6792
0
    }
6793
0
}
6794
6795
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6796
0
    ggml_build_forward_impl(cgraph, tensor, true);
6797
0
}
6798
6799
void ggml_build_backward_expand(
6800
        struct ggml_context *  ctx,
6801
        struct ggml_cgraph  *  cgraph,
6802
0
        struct ggml_tensor  ** grad_accs) {
6803
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6804
0
    GGML_ASSERT(cgraph->grads);
6805
0
    GGML_ASSERT(cgraph->grad_accs);
6806
6807
0
    const int n_nodes_f = cgraph->n_nodes;
6808
6809
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6810
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6811
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6812
6813
0
    {
6814
0
        bool any_params = false;
6815
0
        bool any_loss   = false;
6816
0
        for (int i = 0; i < n_nodes_f; ++i) {
6817
0
            struct ggml_tensor * node = cgraph->nodes[i];
6818
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6819
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6820
0
        }
6821
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6822
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6823
0
    }
6824
6825
0
    for (int i = 0; i < n_nodes_f; ++i) {
6826
0
        struct ggml_tensor * node = cgraph->nodes[i];
6827
6828
0
        if (node->type == GGML_TYPE_I32) {
6829
0
            continue;
6830
0
        }
6831
6832
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6833
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6834
0
        switch (node->op) {
6835
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6836
0
            case GGML_OP_IM2COL:      // only used for its shape
6837
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6838
0
                ignore_src[0] = true;
6839
0
                break;
6840
0
            case GGML_OP_UNARY: {
6841
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6842
                // SGN and STEP unary ops are piecewise constant
6843
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6844
0
                    ignore_src[0] = true;
6845
0
                }
6846
0
            } break;
6847
6848
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6849
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6850
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6851
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6852
0
            case GGML_OP_ROPE:          // positions not differentiable
6853
0
                ignore_src[1] = true;
6854
0
                break;
6855
6856
0
            default:
6857
0
                break;
6858
0
        }
6859
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6860
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6861
0
                continue;
6862
0
            }
6863
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6864
0
            node_needs_grad = true;
6865
0
            break;
6866
0
        }
6867
0
        if (!node_needs_grad) {
6868
0
            continue;
6869
0
        }
6870
6871
        // inplace operations are currently not supported
6872
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
6873
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
6874
6875
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
6876
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
6877
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
6878
0
        if (grad_accs && grad_accs[i]) {
6879
0
            cgraph->grad_accs[ihash] = grad_accs[i];
6880
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6881
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
6882
            // loss tensors always need a gradient accumulator
6883
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
6884
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6885
0
        }
6886
0
        grads_needed[ihash] = true;
6887
0
    }
6888
6889
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
6890
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
6891
        // use allocator to automatically make inplace operations
6892
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
6893
0
    }
6894
6895
0
    free(grads_needed);
6896
0
}
6897
6898
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
6899
0
    void * ptr = *p;
6900
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
6901
0
    *p = (void *) ((char *) ptr + size);
6902
0
    return ptr;
6903
0
}
6904
6905
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
6906
0
    size_t hash_size = ggml_hash_size(size * 2);
6907
0
    void * p = 0;
6908
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
6909
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
6910
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
6911
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
6912
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
6913
0
    if (grads) {
6914
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
6915
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
6916
0
    }
6917
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6918
6919
0
    size_t nbytes = (size_t) p;
6920
0
    return nbytes;
6921
0
}
6922
6923
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
6924
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
6925
0
}
6926
6927
0
size_t ggml_graph_overhead(void) {
6928
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
6929
0
}
6930
6931
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
6932
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
6933
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
6934
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
6935
6936
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
6937
0
    size_t hash_size = ggml_hash_size(size * 2);
6938
6939
0
    void * p = cgraph + 1;
6940
6941
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6942
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6943
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
6944
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
6945
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6946
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
6947
6948
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6949
6950
    // check that we allocated the correct amount of memory
6951
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
6952
6953
0
    *cgraph = (struct ggml_cgraph) {
6954
0
        /*.size         =*/ size,
6955
0
        /*.n_nodes      =*/ 0,
6956
0
        /*.n_leafs      =*/ 0,
6957
0
        /*.nodes        =*/ nodes_ptr,
6958
0
        /*.grads        =*/ grads_ptr,
6959
0
        /*.grad_accs    =*/ grad_accs_ptr,
6960
0
        /*.leafs        =*/ leafs_ptr,
6961
0
        /*.use_counts   =*/ use_counts_ptr,
6962
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
6963
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
6964
0
    };
6965
6966
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
6967
0
    if (grads) {
6968
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
6969
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
6970
0
    }
6971
6972
0
    return cgraph;
6973
0
}
6974
6975
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
6976
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
6977
0
}
6978
6979
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
6980
0
    struct ggml_cgraph cgraph = {
6981
0
        /*.size             =*/ 0,
6982
0
        /*.n_nodes          =*/ i1 - i0,
6983
0
        /*.n_leafs          =*/ 0,
6984
0
        /*.nodes            =*/ cgraph0->nodes + i0,
6985
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
6986
0
        /*.grad_accs        =*/ NULL,
6987
0
        /*.leafs            =*/ NULL,
6988
0
        /*.use_counts       =*/ cgraph0->use_counts,
6989
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
6990
0
        /*.order            =*/ cgraph0->order,
6991
0
    };
6992
6993
0
    return cgraph;
6994
0
}
6995
6996
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
6997
0
    GGML_ASSERT(dst->size >= src->n_leafs);
6998
0
    GGML_ASSERT(dst->size >= src->n_nodes);
6999
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7000
7001
0
    dst->n_leafs = src->n_leafs;
7002
0
    dst->n_nodes = src->n_nodes;
7003
0
    dst->order   = src->order;
7004
7005
0
    for (int i = 0; i < src->n_leafs; ++i) {
7006
0
        dst->leafs[i] = src->leafs[i];
7007
0
    }
7008
7009
0
    for (int i = 0; i < src->n_nodes; ++i) {
7010
0
        dst->nodes[i] = src->nodes[i];
7011
0
    }
7012
7013
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7014
        // copy all hashset keys (tensors) that are in use
7015
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7016
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7017
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7018
0
        }
7019
0
    }
7020
7021
0
    if (dst->grads) {
7022
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7023
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7024
0
    }
7025
0
    if (src->grads) {
7026
0
        GGML_ASSERT(dst->grads     != NULL);
7027
0
        GGML_ASSERT(dst->grad_accs != NULL);
7028
0
        for (int i = 0; i < src->n_nodes; ++i) {
7029
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7030
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7031
7032
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7033
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7034
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7035
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7036
7037
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7038
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7039
0
        }
7040
0
    }
7041
0
}
7042
7043
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7044
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7045
0
    ggml_graph_cpy(cgraph, result);
7046
0
    return result;
7047
0
}
7048
7049
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7050
0
    if (ggml_is_empty(tensor)) {
7051
0
        return tensor;
7052
0
    }
7053
0
    if (tensor->buffer) {
7054
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7055
0
    } else {
7056
0
        GGML_ASSERT(tensor->data);
7057
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7058
0
    }
7059
0
    return tensor;
7060
0
}
7061
7062
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7063
0
    if (!cgraph) {
7064
0
        return;
7065
0
    }
7066
0
    GGML_ASSERT(cgraph->grads != NULL);
7067
7068
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7069
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7070
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7071
7072
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7073
            // clear momenta
7074
0
            ggml_set_zero(node->src[2]);
7075
0
            ggml_set_zero(node->src[3]);
7076
0
        }
7077
7078
        // initial gradients of loss should be 1, 0 otherwise
7079
0
        if (grad_acc) {
7080
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7081
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7082
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7083
7084
0
                const float onef = 1.0f;
7085
0
                if (grad_acc->buffer) {
7086
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7087
0
                } else {
7088
0
                    GGML_ASSERT(grad_acc->data);
7089
0
                    *((float *) grad_acc->data) = onef;
7090
0
                }
7091
0
            } else {
7092
0
                ggml_set_zero(grad_acc);
7093
0
            }
7094
0
        }
7095
0
    }
7096
0
}
7097
7098
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7099
0
    cgraph->n_leafs = 0;
7100
0
    cgraph->n_nodes = 0;
7101
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7102
0
}
7103
7104
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7105
0
    return cgraph->size;
7106
0
}
7107
7108
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7109
0
    if (i < 0) {
7110
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7111
0
        return cgraph->nodes[cgraph->n_nodes + i];
7112
0
    }
7113
7114
0
    GGML_ASSERT(i < cgraph->n_nodes);
7115
0
    return cgraph->nodes[i];
7116
0
}
7117
7118
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7119
0
    return cgraph->nodes;
7120
0
}
7121
7122
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7123
0
    return cgraph->n_nodes;
7124
0
}
7125
7126
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7127
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7128
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7129
0
    cgraph->n_nodes++;
7130
0
}
7131
7132
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7133
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7134
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7135
7136
0
        if (strcmp(leaf->name, name) == 0) {
7137
0
            return leaf;
7138
0
        }
7139
0
    }
7140
7141
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7142
0
        struct ggml_tensor * node = cgraph->nodes[i];
7143
7144
0
        if (strcmp(node->name, name) == 0) {
7145
0
            return node;
7146
0
        }
7147
0
    }
7148
7149
0
    return NULL;
7150
0
}
7151
7152
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7153
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7154
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7155
0
}
7156
7157
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7158
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7159
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7160
0
}
7161
7162
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7163
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7164
7165
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7166
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7167
0
        struct ggml_tensor * node = cgraph->nodes[i];
7168
7169
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7170
0
                i,
7171
0
                node->ne[0], node->ne[1], node->ne[2],
7172
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7173
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7174
0
    }
7175
7176
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7177
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7178
0
        struct ggml_tensor * node = cgraph->leafs[i];
7179
7180
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7181
0
                i,
7182
0
                node->ne[0], node->ne[1],
7183
0
                ggml_op_name(node->op),
7184
0
                ggml_get_name(node));
7185
0
    }
7186
7187
0
    GGML_LOG_INFO("========================================\n");
7188
0
}
7189
7190
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7191
                                      const int *                idxs,
7192
                                      int                        count,
7193
0
                                      const struct ggml_tensor * tensor) {
7194
0
    GGML_ASSERT(cgraph && idxs);
7195
0
    for (int i = 0; i < count; ++i) {
7196
0
        const int node_idx = idxs[i];
7197
7198
0
        if (node_idx >= cgraph->n_nodes) {
7199
0
            return -1;
7200
0
        }
7201
0
        if (cgraph->nodes[node_idx] == tensor) {
7202
0
            return i;
7203
0
        }
7204
0
    }
7205
0
    return -1;
7206
0
}
7207
7208
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7209
                                const int *                node_idxs,
7210
                                int                        count,
7211
                                const enum ggml_op *       ops,
7212
                                const int *                outputs,
7213
0
                                int                        num_outputs) {
7214
0
    GGML_ASSERT(outputs && num_outputs > 0);
7215
7216
0
    for (int i = 0; i < count; ++i) {
7217
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7218
0
            return false;
7219
0
        }
7220
7221
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7222
7223
0
        if (node->op != ops[i]) {
7224
0
            return false;
7225
0
        }
7226
7227
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7228
0
            continue;
7229
0
        }
7230
7231
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7232
0
            return false;
7233
0
        }
7234
7235
0
        int subgraph_uses = 0;
7236
0
        for (int j = i + 1; j < count; ++j) {
7237
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7238
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7239
0
                if (other_node->src[src_idx] == node) {
7240
0
                    subgraph_uses++;
7241
0
                }
7242
0
            }
7243
0
        }
7244
7245
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7246
0
            return false;
7247
0
        }
7248
7249
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7250
0
        struct ggml_tensor * view_src = node->view_src;
7251
0
        while (view_src) {
7252
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7253
0
                return false;
7254
0
            }
7255
0
            view_src = view_src->view_src;
7256
0
        }
7257
0
    }
7258
7259
0
    return true;
7260
0
}
7261
7262
// check if node is part of the graph
7263
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7264
0
    if (cgraph == NULL) {
7265
0
        return true;
7266
0
    }
7267
7268
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7269
0
        if (cgraph->nodes[i] == node) {
7270
0
            return true;
7271
0
        }
7272
0
    }
7273
7274
0
    return false;
7275
0
}
7276
7277
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7278
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7279
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7280
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7281
7282
0
        if (grad == node) {
7283
0
            return parent;
7284
0
        }
7285
0
    }
7286
7287
0
    return NULL;
7288
0
}
7289
7290
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7291
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7292
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7293
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7294
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7295
0
            gparent ? (void *) gparent : (void *) node,
7296
0
            gparent ? "empty" : "vee",
7297
0
            gparent ? "dashed" : "solid",
7298
0
            label);
7299
0
}
7300
7301
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7302
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7303
0
            (void *) parent,
7304
0
            (void *) node,
7305
0
            label);
7306
0
}
7307
7308
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * gf, const char * filename) {
7309
0
    char color[16];
7310
7311
0
    FILE * fp = ggml_fopen(filename, "w");
7312
0
    GGML_ASSERT(fp);
7313
7314
0
    fprintf(fp, "digraph G {\n");
7315
0
    fprintf(fp, "  newrank = true;\n");
7316
0
    fprintf(fp, "  rankdir = TB;\n");
7317
7318
0
    for (int i = 0; i < gb->n_nodes; i++) {
7319
0
        struct ggml_tensor * node = gb->nodes[i];
7320
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7321
7322
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7323
0
            continue;
7324
0
        }
7325
7326
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7327
0
            snprintf(color, sizeof(color), "yellow");
7328
0
        } else if (grad) {
7329
0
            if (ggml_graph_find(gf, node)) {
7330
0
                snprintf(color, sizeof(color), "green");
7331
0
            } else {
7332
0
                snprintf(color, sizeof(color), "lightblue");
7333
0
            }
7334
0
        } else {
7335
0
            snprintf(color, sizeof(color), "white");
7336
0
        }
7337
7338
0
        fprintf(fp, "  \"%p\" [ "
7339
0
                    "style = filled; fillcolor = %s; shape = record; "
7340
0
                    "label=\"",
7341
0
                (void *) node, color);
7342
7343
0
        if (strlen(node->name) > 0) {
7344
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7345
0
        } else {
7346
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7347
0
        }
7348
7349
0
        if (ggml_is_matrix(node)) {
7350
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7351
0
        } else {
7352
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7353
0
        }
7354
7355
0
        if (grad) {
7356
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7357
0
        } else {
7358
0
            fprintf(fp, "\"; ]\n");
7359
0
        }
7360
0
    }
7361
7362
0
    for (int i = 0; i < gb->n_leafs; i++) {
7363
0
        struct ggml_tensor * node = gb->leafs[i];
7364
7365
0
        snprintf(color, sizeof(color), "pink");
7366
7367
0
        fprintf(fp, "  \"%p\" [ "
7368
0
                    "style = filled; fillcolor = %s; shape = record; "
7369
0
                    "label=\"<x>",
7370
0
                (void *) node, color);
7371
7372
0
        if (strlen(node->name) > 0) {
7373
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7374
0
        } else {
7375
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7376
0
        }
7377
7378
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7379
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7380
0
            fprintf(fp, " | (");
7381
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7382
                // FIXME: use ggml-backend to obtain the tensor data
7383
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7384
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7385
                //}
7386
                //else if (node->type == GGML_TYPE_F32 ||
7387
                //         node->type == GGML_TYPE_F16 ||
7388
                //         node->type == GGML_TYPE_BF16) {
7389
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7390
                //}
7391
                //else
7392
0
                {
7393
0
                    fprintf(fp, "#");
7394
0
                }
7395
0
                if (j < ggml_nelements(node) - 1) {
7396
0
                    fprintf(fp, ", ");
7397
0
                }
7398
0
            }
7399
0
            fprintf(fp, ")");
7400
0
        }
7401
0
        fprintf(fp, "\"; ]\n");
7402
0
    }
7403
7404
0
    for (int i = 0; i < gb->n_nodes; i++) {
7405
0
        struct ggml_tensor * node = gb->nodes[i];
7406
7407
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7408
0
            if (node->src[j]) {
7409
0
                char label[16];
7410
0
                snprintf(label, sizeof(label), "src %d", j);
7411
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7412
0
            }
7413
0
        }
7414
0
    }
7415
7416
0
    for (int i = 0; i < gb->n_leafs; i++) {
7417
0
        struct ggml_tensor * node = gb->leafs[i];
7418
7419
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7420
0
            if (node->src[j]) {
7421
0
                char label[16];
7422
0
                snprintf(label, sizeof(label), "src %d", j);
7423
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7424
0
            }
7425
0
        }
7426
0
    }
7427
7428
0
    fprintf(fp, "}\n");
7429
7430
0
    fclose(fp);
7431
7432
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7433
0
}
7434
7435
////////////////////////////////////////////////////////////////////////////////
7436
7437
0
void ggml_set_input(struct ggml_tensor * tensor) {
7438
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7439
0
}
7440
7441
0
void ggml_set_output(struct ggml_tensor * tensor) {
7442
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7443
0
}
7444
7445
0
void ggml_set_param(struct ggml_tensor * tensor) {
7446
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7447
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7448
0
}
7449
7450
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7451
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7452
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7453
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7454
0
}
7455
7456
////////////////////////////////////////////////////////////////////////////////
7457
7458
0
void ggml_quantize_init(enum ggml_type type) {
7459
0
    ggml_critical_section_start();
7460
7461
0
    switch (type) {
7462
0
        case GGML_TYPE_IQ2_XXS:
7463
0
        case GGML_TYPE_IQ2_XS:
7464
0
        case GGML_TYPE_IQ2_S:
7465
0
        case GGML_TYPE_IQ1_S:
7466
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7467
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7468
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7469
0
        default: // nothing
7470
0
            break;
7471
0
    }
7472
7473
0
    ggml_critical_section_end();
7474
0
}
7475
7476
1.04k
void ggml_quantize_free(void) {
7477
1.04k
    ggml_critical_section_start();
7478
7479
1.04k
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7480
1.04k
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7481
1.04k
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7482
1.04k
    iq3xs_free_impl(256);
7483
7484
1.04k
    ggml_critical_section_end();
7485
1.04k
}
7486
7487
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7488
0
    return
7489
0
        type == GGML_TYPE_IQ2_XXS ||
7490
0
        type == GGML_TYPE_IQ2_XS  ||
7491
0
        type == GGML_TYPE_IQ1_S;//   ||
7492
        //type == GGML_TYPE_IQ1_M;
7493
0
}
7494
7495
size_t ggml_quantize_chunk(
7496
        enum ggml_type   type,
7497
           const float * src,
7498
                  void * dst,
7499
               int64_t   start,
7500
               int64_t   nrows,
7501
               int64_t   n_per_row,
7502
0
           const float * imatrix) {
7503
0
    const int64_t n = (int64_t) nrows * n_per_row;
7504
7505
0
    if (ggml_quantize_requires_imatrix(type)) {
7506
0
        GGML_ASSERT(imatrix != NULL);
7507
0
    }
7508
7509
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7510
0
    GGML_ASSERT(start % n_per_row == 0);
7511
7512
0
    ggml_quantize_init(type); // this is noop if already initialized
7513
7514
0
    const size_t start_row = start / n_per_row;
7515
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7516
7517
0
    size_t result = 0;
7518
7519
0
    switch (type) {
7520
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7521
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7522
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7523
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7524
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7525
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7526
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7527
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7528
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7529
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7530
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7531
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7532
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7533
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7534
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7535
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7536
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7537
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7538
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7539
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7540
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7541
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7542
0
        case GGML_TYPE_F16:
7543
0
            {
7544
0
                size_t elemsize = sizeof(ggml_fp16_t);
7545
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7546
0
                result = n * elemsize;
7547
0
            } break;
7548
0
        case GGML_TYPE_BF16:
7549
0
            {
7550
0
                size_t elemsize = sizeof(ggml_bf16_t);
7551
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7552
0
                result = n * elemsize;
7553
0
            } break;
7554
0
        case GGML_TYPE_F32:
7555
0
            {
7556
0
                size_t elemsize = sizeof(float);
7557
0
                result = n * elemsize;
7558
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7559
0
            } break;
7560
0
        default:
7561
0
            assert(false);
7562
0
    }
7563
7564
0
    GGML_ASSERT(result == nrows * row_size);
7565
7566
0
    return result;
7567
0
}
7568
7569
////////////////////////////////////////////////////////////////////////////////
7570
7571
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7572
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7573
0
    g_logger_state.log_callback_user_data = user_data;
7574
0
}
7575
7576
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7577
0
    p->n_threads  = n_threads;
7578
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7579
0
    p->poll       = 50;    // hybrid-polling enabled
7580
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7581
0
    p->paused     = false; // threads are ready to go
7582
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7583
0
}
7584
7585
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7586
0
    struct ggml_threadpool_params p;
7587
0
    ggml_threadpool_params_init(&p, n_threads);
7588
0
    return p;
7589
0
}
7590
7591
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7592
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7593
0
    if (p0->prio           != p1->prio       )    return false;
7594
0
    if (p0->poll           != p1->poll       )    return false;
7595
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7596
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7597
0
}