Coverage Report

Created: 2026-03-07 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml.c
Line
Count
Source
1
#define _CRT_SECURE_NO_DEPRECATE // Disables "unsafe" warnings on Windows
2
#define _USE_MATH_DEFINES // For M_PI on MSVC
3
4
#include "ggml-backend.h"
5
#include "ggml-impl.h"
6
#include "ggml-threading.h"
7
#include "ggml-cpu.h"
8
#include "ggml.h"
9
10
// FIXME: required here for quantization functions
11
#include "ggml-quants.h"
12
13
#ifdef GGML_USE_CPU_HBM
14
#include <hbwmalloc.h>
15
#endif
16
17
#if defined(_MSC_VER) || defined(__MINGW32__)
18
#include <malloc.h> // using malloc.h with MSC/MINGW
19
#elif !defined(__FreeBSD__) && !defined(__NetBSD__) && !defined(__OpenBSD__)
20
#include <alloca.h>
21
#endif
22
23
#include <assert.h>
24
#include <errno.h>
25
#include <time.h>
26
#include <math.h>
27
#include <stdlib.h>
28
#include <string.h>
29
#include <stdint.h>
30
#include <inttypes.h>
31
#include <stdio.h>
32
#include <float.h>
33
#include <limits.h>
34
#include <stdarg.h>
35
#include <signal.h>
36
#if defined(__gnu_linux__)
37
#include <syscall.h>
38
#endif
39
40
#if defined(__APPLE__)
41
#include <unistd.h>
42
#include <mach/mach.h>
43
#include <TargetConditionals.h>
44
#endif
45
46
#if defined(_WIN32)
47
#define WIN32_LEAN_AND_MEAN
48
#ifndef NOMINMAX
49
    #define NOMINMAX
50
#endif
51
#include <windows.h>
52
#endif
53
54
0
#define UNUSED GGML_UNUSED
55
56
// Needed for ggml_fp32_to_bf16_row()
57
#if defined(__AVX512BF16__)
58
#if defined(_MSC_VER)
59
#define m512i(p) p
60
#else
61
#include <immintrin.h>
62
#define m512i(p) (__m512i)(p)
63
#endif // defined(_MSC_VER)
64
#endif // defined(__AVX512BF16__)
65
66
#if defined(__linux__) || \
67
    defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
68
    (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH)
69
70
#include <unistd.h>
71
#include <sys/types.h>
72
#include <sys/stat.h>
73
#include <sys/wait.h>
74
#if defined(__linux__)
75
#include <sys/prctl.h>
76
#endif
77
78
#if defined(__ANDROID__)
79
#include <unwind.h>
80
#include <dlfcn.h>
81
#include <stdio.h>
82
83
struct backtrace_state {
84
    void ** current;
85
    void ** end;
86
};
87
88
static _Unwind_Reason_Code unwind_callback(struct _Unwind_Context* context, void* arg) {
89
    struct backtrace_state * state = (struct backtrace_state *)arg;
90
    uintptr_t pc = _Unwind_GetIP(context);
91
    if (pc) {
92
        if (state->current == state->end) {
93
            return _URC_END_OF_STACK;
94
        } else {
95
            *state->current++ = (void*)pc;
96
        }
97
    }
98
    return _URC_NO_REASON;
99
}
100
101
static void ggml_print_backtrace_symbols(void) {
102
    const int max = 100;
103
    void* buffer[max];
104
105
    struct backtrace_state state = {buffer, buffer + max};
106
    _Unwind_Backtrace(unwind_callback, &state);
107
108
    int count = state.current - buffer;
109
110
    for (int idx = 0; idx < count; ++idx) {
111
        const void * addr = buffer[idx];
112
        const char * symbol = "";
113
114
        Dl_info info;
115
        if (dladdr(addr, &info) && info.dli_sname) {
116
            symbol = info.dli_sname;
117
        }
118
119
        fprintf(stderr, "%d: %p %s\n", idx, addr, symbol);
120
    }
121
}
122
#elif defined(__linux__) && defined(__GLIBC__)
123
#include <execinfo.h>
124
0
static void ggml_print_backtrace_symbols(void) {
125
0
    void * trace[100];
126
0
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
127
0
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
128
0
}
129
#elif defined(__APPLE__)
130
#include <execinfo.h>
131
static void ggml_print_backtrace_symbols(void) {
132
    void * trace[100];
133
    int nptrs = backtrace(trace, sizeof(trace)/sizeof(trace[0]));
134
    backtrace_symbols_fd(trace, nptrs, STDERR_FILENO);
135
}
136
#else
137
static void ggml_print_backtrace_symbols(void) {
138
    // platform not supported
139
}
140
#endif
141
142
0
void ggml_print_backtrace(void) {
143
0
    const char * GGML_NO_BACKTRACE = getenv("GGML_NO_BACKTRACE");
144
0
    if (GGML_NO_BACKTRACE) {
145
0
        return;
146
0
    }
147
#if defined(__APPLE__)
148
    // On macOS, fork+debugger attachment is problematic due to:
149
    // 1. libdispatch "poisons" forked child processes
150
    // 2. lldb has issues attaching to parent from forked child
151
    // Use simple backtrace() instead to avoid Terminal.app crashes
152
    const char * GGML_BACKTRACE_LLDB = getenv("GGML_BACKTRACE_LLDB");
153
    if (!GGML_BACKTRACE_LLDB) {
154
        fprintf(stderr, "WARNING: Using native backtrace. Set GGML_BACKTRACE_LLDB for more info.\n");
155
        fprintf(stderr, "WARNING: GGML_BACKTRACE_LLDB may cause native MacOS Terminal.app to crash.\n");
156
        fprintf(stderr, "See: https://github.com/ggml-org/llama.cpp/pull/17869\n");
157
        ggml_print_backtrace_symbols();
158
        return;
159
    }
160
#endif
161
0
#if defined(__linux__)
162
0
    FILE * f = fopen("/proc/self/status", "r");
163
0
    size_t size = 0;
164
0
    char * line = NULL;
165
0
    ssize_t length = 0;
166
0
    while ((length = getline(&line, &size, f)) > 0) {
167
0
        if (!strncmp(line, "TracerPid:", sizeof("TracerPid:") - 1) &&
168
0
            (length != sizeof("TracerPid:\t0\n") - 1 || line[length - 2] != '0')) {
169
            // Already being debugged, and the breakpoint is the later abort()
170
0
            free(line);
171
0
            fclose(f);
172
0
            return;
173
0
        }
174
0
    }
175
0
    free(line);
176
0
    fclose(f);
177
0
    int lock[2] = { -1, -1 };
178
0
    (void) !pipe(lock); // Don't start gdb until after PR_SET_PTRACER
179
0
#endif
180
0
    const int parent_pid = getpid();
181
0
    const int child_pid = fork();
182
0
    if (child_pid < 0) { // error
183
0
#if defined(__linux__)
184
0
        close(lock[1]);
185
0
        close(lock[0]);
186
0
#endif
187
0
        return;
188
0
    } else if (child_pid == 0) { // child
189
0
        char attach[32];
190
0
        snprintf(attach, sizeof(attach), "attach %d", parent_pid);
191
0
#if defined(__linux__)
192
0
        close(lock[1]);
193
0
        (void) !read(lock[0], lock, 1);
194
0
        close(lock[0]);
195
0
#endif
196
        // try gdb
197
0
        execlp("gdb", "gdb", "--batch",
198
0
            "-ex", "set style enabled on",
199
0
            "-ex", attach,
200
0
            "-ex", "bt -frame-info source-and-location",
201
0
            "-ex", "detach",
202
0
            "-ex", "quit",
203
0
            (char *) NULL);
204
        // try lldb
205
0
        execlp("lldb", "lldb", "--batch",
206
0
            "-o", "bt",
207
0
            "-o", "quit",
208
0
            "-p", &attach[sizeof("attach ") - 1],
209
0
            (char *) NULL);
210
        // gdb failed, fallback to backtrace_symbols
211
0
        ggml_print_backtrace_symbols();
212
0
        _Exit(0);
213
0
    } else { // parent
214
0
#if defined(__linux__)
215
0
        prctl(PR_SET_PTRACER, child_pid);
216
0
        close(lock[1]);
217
0
        close(lock[0]);
218
0
#endif
219
0
        waitpid(child_pid, NULL, 0);
220
0
    }
221
0
}
222
#else
223
void ggml_print_backtrace(void) {
224
    // platform not supported
225
}
226
#endif
227
228
static ggml_abort_callback_t g_abort_callback = NULL;
229
230
// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout)
231
0
GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) {
232
0
    ggml_abort_callback_t ret_val = g_abort_callback;
233
0
    g_abort_callback = callback;
234
0
    return ret_val;
235
0
}
236
237
235
void ggml_abort(const char * file, int line, const char * fmt, ...) {
238
235
    fflush(stdout);
239
240
235
    char message[2048];
241
235
    int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line);
242
243
235
    va_list args;
244
235
    va_start(args, fmt);
245
235
    vsnprintf(message + offset, sizeof(message) - offset, fmt, args);
246
235
    va_end(args);
247
248
235
    if (g_abort_callback) {
249
0
        g_abort_callback(message);
250
235
    } else {
251
        // default: print error and backtrace to stderr
252
235
        fprintf(stderr, "%s\n", message);
253
        
254
235
    }
255
256
235
    abort();
257
235
}
258
259
// ggml_print_backtrace is registered with std::set_terminate by ggml.cpp
260
261
//
262
// logging
263
//
264
265
struct ggml_logger_state {
266
    ggml_log_callback log_callback;
267
    void * log_callback_user_data;
268
};
269
static struct ggml_logger_state g_logger_state = {ggml_log_callback_default, NULL};
270
271
4.94k
static void ggml_log_internal_v(enum ggml_log_level level, const char * format, va_list args) {
272
4.94k
    if (format == NULL) {
273
0
        return;
274
0
    }
275
4.94k
    va_list args_copy;
276
4.94k
    va_copy(args_copy, args);
277
4.94k
    char buffer[128];
278
4.94k
    int len = vsnprintf(buffer, 128, format, args);
279
4.94k
    if (len < 128) {
280
4.84k
        g_logger_state.log_callback(level, buffer, g_logger_state.log_callback_user_data);
281
4.84k
    } else {
282
99
        char * buffer2 = (char *) calloc(len + 1, sizeof(char));
283
99
        vsnprintf(buffer2, len + 1, format, args_copy);
284
99
        buffer2[len] = 0;
285
99
        g_logger_state.log_callback(level, buffer2, g_logger_state.log_callback_user_data);
286
99
        free(buffer2);
287
99
    }
288
4.94k
    va_end(args_copy);
289
4.94k
}
290
291
4.94k
void ggml_log_internal(enum ggml_log_level level, const char * format, ...) {
292
4.94k
    va_list args;
293
4.94k
    va_start(args, format);
294
4.94k
    ggml_log_internal_v(level, format, args);
295
4.94k
    va_end(args);
296
4.94k
}
297
298
4.94k
void ggml_log_callback_default(enum ggml_log_level level, const char * text, void * user_data) {
299
4.94k
    (void) level;
300
4.94k
    (void) user_data;
301
4.94k
    fputs(text, stderr);
302
4.94k
    fflush(stderr);
303
4.94k
}
304
305
//
306
// end of logging block
307
//
308
309
#ifdef GGML_USE_ACCELERATE
310
// uncomment to use vDSP for soft max computation
311
// note: not sure if it is actually faster
312
//#define GGML_SOFT_MAX_ACCELERATE
313
#endif
314
315
316
5.28k
void * ggml_aligned_malloc(size_t size) {
317
#if defined(__s390x__)
318
    const int alignment = 256;
319
#else
320
5.28k
    const int alignment = 64;
321
5.28k
#endif
322
323
#if defined(_MSC_VER) || defined(__MINGW32__)
324
    return _aligned_malloc(size, alignment);
325
#else
326
5.28k
    if (size == 0) {
327
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_aligned_malloc!\n");
328
0
        return NULL;
329
0
    }
330
5.28k
    void * aligned_memory = NULL;
331
  #ifdef GGML_USE_CPU_HBM
332
    int result = hbw_posix_memalign(&aligned_memory, alignment, size);
333
  #elif TARGET_OS_OSX
334
    GGML_UNUSED(alignment);
335
    kern_return_t alloc_status = vm_allocate((vm_map_t) mach_task_self(), (vm_address_t *) &aligned_memory, size, VM_FLAGS_ANYWHERE);
336
    int result = EFAULT;
337
    switch (alloc_status) {
338
        case KERN_SUCCESS:
339
            result = 0;
340
            break;
341
        case KERN_INVALID_ADDRESS:
342
            result = EINVAL;
343
            break;
344
        case KERN_NO_SPACE:
345
            result = ENOMEM;
346
            break;
347
        default:
348
            result = EFAULT;
349
            break;
350
    }
351
  #else
352
5.28k
    int result = posix_memalign(&aligned_memory, alignment, size);
353
5.28k
  #endif
354
5.28k
    if (result != 0) {
355
        // Handle allocation failure
356
0
        const char *error_desc = "unknown allocation error";
357
0
        switch (result) {
358
0
            case EINVAL:
359
0
                error_desc = "invalid alignment value";
360
0
                break;
361
0
            case ENOMEM:
362
0
                error_desc = "insufficient memory";
363
0
                break;
364
0
        }
365
0
        GGML_LOG_ERROR("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
366
0
        return NULL;
367
0
    }
368
5.28k
    return aligned_memory;
369
5.28k
#endif
370
5.28k
}
371
372
5.27k
void ggml_aligned_free(void * ptr, size_t size) {
373
5.27k
    GGML_UNUSED(size);
374
#if defined(_MSC_VER) || defined(__MINGW32__)
375
    _aligned_free(ptr);
376
#elif GGML_USE_CPU_HBM
377
    if (ptr != NULL) {
378
        hbw_free(ptr);
379
    }
380
#elif TARGET_OS_OSX
381
    if (ptr != NULL) {
382
        vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)ptr, size);
383
    }
384
#else
385
5.27k
    free(ptr);
386
5.27k
#endif
387
5.27k
}
388
389
390
5.28k
inline static void * ggml_malloc(size_t size) {
391
5.28k
    if (size == 0) {
392
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_malloc!\n");
393
0
        return NULL;
394
0
    }
395
5.28k
    void * result = malloc(size);
396
5.28k
    if (result == NULL) {
397
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
398
0
        GGML_ABORT("fatal error");
399
0
    }
400
5.28k
    return result;
401
5.28k
}
402
403
// calloc
404
0
inline static void * ggml_calloc(size_t num, size_t size) {
405
0
if ((num * size) > 9000000) {GGML_ABORT("calloc err");}
406
407
0
    if (num == 0 || size == 0) {
408
0
        GGML_LOG_WARN("Behavior may be unexpected when allocating 0 bytes for ggml_calloc!\n");
409
0
        return NULL;
410
0
    }
411
0
    void * result = calloc(num, size);
412
0
    if (result == NULL) {
413
0
        GGML_LOG_ERROR("%s: failed to allocate %6.2f MB\n", __func__, size/(1024.0*1024.0));
414
0
        GGML_ABORT("fatal error");
415
0
    }
416
0
    return result;
417
0
}
418
419
5.28k
#define GGML_MALLOC(size)      ggml_malloc(size)
420
0
#define GGML_CALLOC(num, size) ggml_calloc(num, size)
421
422
5.27k
#define GGML_FREE(ptr) free(ptr)
423
424
0
const char * ggml_status_to_string(enum ggml_status status) {
425
0
    switch (status) {
426
0
        case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)";
427
0
        case GGML_STATUS_FAILED:       return "GGML status: error (operation failed)";
428
0
        case GGML_STATUS_SUCCESS:      return "GGML status: success";
429
0
        case GGML_STATUS_ABORTED:      return "GGML status: warning (operation aborted)";
430
0
    }
431
432
0
    return "GGML status: unknown";
433
0
}
434
435
0
float ggml_fp16_to_fp32(ggml_fp16_t x) {
436
0
#define ggml_fp16_to_fp32 do_not_use__ggml_fp16_to_fp32__in_ggml
437
0
    return GGML_FP16_TO_FP32(x);
438
0
}
439
440
0
ggml_fp16_t ggml_fp32_to_fp16(float x) {
441
0
#define ggml_fp32_to_fp16 do_not_use__ggml_fp32_to_fp16__in_ggml
442
0
    return GGML_FP32_TO_FP16(x);
443
0
}
444
445
0
float ggml_bf16_to_fp32(ggml_bf16_t x) {
446
0
#define ggml_bf16_to_fp32 do_not_use__ggml_bf16_to_fp32__in_ggml
447
0
    return GGML_BF16_TO_FP32(x);  // it just left shifts
448
0
}
449
450
0
ggml_bf16_t ggml_fp32_to_bf16(float x) {
451
0
#define ggml_fp32_to_bf16 do_not_use__ggml_fp32_to_bf16__in_ggml
452
0
    return GGML_FP32_TO_BF16(x);
453
0
}
454
455
0
void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
456
0
    for (int64_t i = 0; i < n; i++) {
457
0
        y[i] = GGML_FP16_TO_FP32(x[i]);
458
0
    }
459
0
}
460
461
0
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
462
0
    int i = 0;
463
0
    for (; i < n; ++i) {
464
0
        y[i] = GGML_FP32_TO_FP16(x[i]);
465
0
    }
466
0
}
467
468
0
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
469
0
    int i = 0;
470
0
    for (; i < n; ++i) {
471
0
        y[i] = GGML_BF16_TO_FP32(x[i]);
472
0
    }
473
0
}
474
475
0
void ggml_fp32_to_bf16_row_ref(const float * x, ggml_bf16_t * y, int64_t n) {
476
0
    for (int i = 0; i < n; i++) {
477
0
        y[i] = ggml_compute_fp32_to_bf16(x[i]);
478
0
    }
479
0
}
480
481
0
void ggml_fp32_to_bf16_row(const float * x, ggml_bf16_t * y, int64_t n) {
482
0
  int i = 0;
483
#if defined(__AVX512BF16__)
484
  // subnormals are flushed to zero on this platform
485
  for (; i + 32 <= n; i += 32) {
486
        _mm512_storeu_si512(
487
            (__m512i *)(y + i),
488
            m512i(_mm512_cvtne2ps_pbh(_mm512_loadu_ps(x + i + 16),
489
                                _mm512_loadu_ps(x + i))));
490
  }
491
#endif
492
0
    for (; i < n; i++) {
493
0
        y[i] = GGML_FP32_TO_BF16(x[i]);
494
0
    }
495
0
}
496
497
0
bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) {
498
0
    return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0;
499
0
}
500
501
0
const char * ggml_version(void) {
502
0
    return GGML_VERSION;
503
0
}
504
505
0
const char * ggml_commit(void) {
506
0
    return GGML_COMMIT;
507
0
}
508
509
//
510
// timing
511
//
512
513
#if defined(_MSC_VER) || defined(__MINGW32__)
514
static int64_t timer_freq, timer_start;
515
void ggml_time_init(void) {
516
    LARGE_INTEGER t;
517
    QueryPerformanceFrequency(&t);
518
    timer_freq = t.QuadPart;
519
520
    // The multiplication by 1000 or 1000000 below can cause an overflow if timer_freq
521
    // and the uptime is high enough.
522
    // We subtract the program start time to reduce the likelihood of that happening.
523
    QueryPerformanceCounter(&t);
524
    timer_start = t.QuadPart;
525
}
526
int64_t ggml_time_ms(void) {
527
    LARGE_INTEGER t;
528
    QueryPerformanceCounter(&t);
529
    return ((t.QuadPart-timer_start) * 1000) / timer_freq;
530
}
531
int64_t ggml_time_us(void) {
532
    LARGE_INTEGER t;
533
    QueryPerformanceCounter(&t);
534
    return ((t.QuadPart-timer_start) * 1000000) / timer_freq;
535
}
536
#else
537
13.9k
void ggml_time_init(void) {}
538
0
int64_t ggml_time_ms(void) {
539
0
    struct timespec ts;
540
0
    clock_gettime(CLOCK_MONOTONIC, &ts);
541
0
    return (int64_t)ts.tv_sec*1000 + (int64_t)ts.tv_nsec/1000000;
542
0
}
543
544
8.38k
int64_t ggml_time_us(void) {
545
8.38k
    struct timespec ts;
546
8.38k
    clock_gettime(CLOCK_MONOTONIC, &ts);
547
8.38k
    return (int64_t)ts.tv_sec*1000000 + (int64_t)ts.tv_nsec/1000;
548
8.38k
}
549
#endif
550
551
0
int64_t ggml_cycles(void) {
552
0
    return clock();
553
0
}
554
555
0
int64_t ggml_cycles_per_ms(void) {
556
0
    return CLOCKS_PER_SEC/1000;
557
0
}
558
559
//
560
// cross-platform UTF-8 file paths
561
//
562
563
#ifdef _WIN32
564
static wchar_t * ggml_mbstowcs(const char * mbs) {
565
    int wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, NULL, 0);
566
    if (!wlen) {
567
        errno = EINVAL;
568
        return NULL;
569
    }
570
571
    wchar_t * wbuf = GGML_MALLOC(wlen * sizeof(wchar_t));
572
    wlen = MultiByteToWideChar(CP_UTF8, 0, mbs, -1, wbuf, wlen);
573
    if (!wlen) {
574
        GGML_FREE(wbuf);
575
        errno = EINVAL;
576
        return NULL;
577
    }
578
579
    return wbuf;
580
}
581
#endif
582
583
5.26k
FILE * ggml_fopen(const char * fname, const char * mode) {
584
#ifdef _WIN32
585
    FILE * file = NULL;
586
587
    // convert fname (UTF-8)
588
    wchar_t * wfname = ggml_mbstowcs(fname);
589
    if (wfname) {
590
        // convert mode (ANSI)
591
        wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
592
        wchar_t * wmode_p = wmode;
593
        do {
594
            *wmode_p++ = (wchar_t)*mode;
595
        } while (*mode++);
596
597
        // open file
598
        file = _wfopen(wfname, wmode);
599
600
        GGML_FREE(wfname);
601
        GGML_FREE(wmode);
602
    }
603
604
    return file;
605
#else
606
5.26k
    return fopen(fname, mode);
607
5.26k
#endif
608
609
5.26k
}
610
611
static const struct ggml_type_traits type_traits[GGML_TYPE_COUNT] = {
612
    [GGML_TYPE_I8] = {
613
        .type_name                = "i8",
614
        .blck_size                = 1,
615
        .type_size                = sizeof(int8_t),
616
        .is_quantized             = false,
617
    },
618
    [GGML_TYPE_I16] = {
619
        .type_name                = "i16",
620
        .blck_size                = 1,
621
        .type_size                = sizeof(int16_t),
622
        .is_quantized             = false,
623
    },
624
    [GGML_TYPE_I32] = {
625
        .type_name                = "i32",
626
        .blck_size                = 1,
627
        .type_size                = sizeof(int32_t),
628
        .is_quantized             = false,
629
    },
630
    [GGML_TYPE_I64] = {
631
        .type_name                = "i64",
632
        .blck_size                = 1,
633
        .type_size                = sizeof(int64_t),
634
        .is_quantized             = false,
635
    },
636
    [GGML_TYPE_F64] = {
637
        .type_name                = "f64",
638
        .blck_size                = 1,
639
        .type_size                = sizeof(double),
640
        .is_quantized             = false,
641
    },
642
    [GGML_TYPE_F32] = {
643
        .type_name                = "f32",
644
        .blck_size                = 1,
645
        .type_size                = sizeof(float),
646
        .is_quantized             = false,
647
    },
648
    [GGML_TYPE_F16] = {
649
        .type_name                = "f16",
650
        .blck_size                = 1,
651
        .type_size                = sizeof(ggml_fp16_t),
652
        .is_quantized             = false,
653
        .to_float                 = (ggml_to_float_t) ggml_fp16_to_fp32_row,
654
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_fp16_row,
655
    },
656
    [GGML_TYPE_Q4_0] = {
657
        .type_name                = "q4_0",
658
        .blck_size                = QK4_0,
659
        .type_size                = sizeof(block_q4_0),
660
        .is_quantized             = true,
661
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_0,
662
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_0_ref,
663
    },
664
    [GGML_TYPE_Q4_1] = {
665
        .type_name                = "q4_1",
666
        .blck_size                = QK4_1,
667
        .type_size                = sizeof(block_q4_1),
668
        .is_quantized             = true,
669
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_1,
670
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_1_ref,
671
    },
672
    [4] = { // GGML_TYPE_Q4_2
673
        .type_name                = "DEPRECATED",
674
        .blck_size                = 0,
675
        .type_size                = 0,
676
        .is_quantized             = false,
677
    },
678
    [5] = { // GGML_TYPE_Q4_3
679
        .type_name                = "DEPRECATED",
680
        .blck_size                = 0,
681
        .type_size                = 0,
682
        .is_quantized             = false,
683
    },
684
    [GGML_TYPE_Q5_0] = {
685
        .type_name                = "q5_0",
686
        .blck_size                = QK5_0,
687
        .type_size                = sizeof(block_q5_0),
688
        .is_quantized             = true,
689
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_0,
690
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_0_ref,
691
    },
692
    [GGML_TYPE_Q5_1] = {
693
        .type_name                = "q5_1",
694
        .blck_size                = QK5_1,
695
        .type_size                = sizeof(block_q5_1),
696
        .is_quantized             = true,
697
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_1,
698
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_1_ref,
699
    },
700
    [GGML_TYPE_Q8_0] = {
701
        .type_name                = "q8_0",
702
        .blck_size                = QK8_0,
703
        .type_size                = sizeof(block_q8_0),
704
        .is_quantized             = true,
705
        .to_float                 = (ggml_to_float_t) dequantize_row_q8_0,
706
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_0_ref,
707
    },
708
    [GGML_TYPE_Q8_1] = {
709
        .type_name                = "q8_1",
710
        .blck_size                = QK8_1,
711
        .type_size                = sizeof(block_q8_1),
712
        .is_quantized             = true,
713
        .from_float_ref           = (ggml_from_float_t) quantize_row_q8_1_ref,
714
    },
715
    [GGML_TYPE_MXFP4] = {
716
        .type_name                = "mxfp4",
717
        .blck_size                = QK_MXFP4,
718
        .type_size                = sizeof(block_mxfp4),
719
        .is_quantized             = true,
720
        .to_float                 = (ggml_to_float_t) dequantize_row_mxfp4,
721
        .from_float_ref           = (ggml_from_float_t)quantize_row_mxfp4_ref,
722
    },
723
    [GGML_TYPE_Q2_K] = {
724
        .type_name                = "q2_K",
725
        .blck_size                = QK_K,
726
        .type_size                = sizeof(block_q2_K),
727
        .is_quantized             = true,
728
        .to_float                 = (ggml_to_float_t) dequantize_row_q2_K,
729
        .from_float_ref           = (ggml_from_float_t) quantize_row_q2_K_ref,
730
    },
731
    [GGML_TYPE_Q3_K] = {
732
        .type_name                = "q3_K",
733
        .blck_size                = QK_K,
734
        .type_size                = sizeof(block_q3_K),
735
        .is_quantized             = true,
736
        .to_float                 = (ggml_to_float_t) dequantize_row_q3_K,
737
        .from_float_ref           = (ggml_from_float_t) quantize_row_q3_K_ref,
738
    },
739
    [GGML_TYPE_Q4_K] = {
740
        .type_name                = "q4_K",
741
        .blck_size                = QK_K,
742
        .type_size                = sizeof(block_q4_K),
743
        .is_quantized             = true,
744
        .to_float                 = (ggml_to_float_t) dequantize_row_q4_K,
745
        .from_float_ref           = (ggml_from_float_t) quantize_row_q4_K_ref,
746
    },
747
    [GGML_TYPE_Q5_K] = {
748
        .type_name                = "q5_K",
749
        .blck_size                = QK_K,
750
        .type_size                = sizeof(block_q5_K),
751
        .is_quantized             = true,
752
        .to_float                 = (ggml_to_float_t) dequantize_row_q5_K,
753
        .from_float_ref           = (ggml_from_float_t) quantize_row_q5_K_ref,
754
    },
755
    [GGML_TYPE_Q6_K] = {
756
        .type_name                = "q6_K",
757
        .blck_size                = QK_K,
758
        .type_size                = sizeof(block_q6_K),
759
        .is_quantized             = true,
760
        .to_float                 = (ggml_to_float_t) dequantize_row_q6_K,
761
        .from_float_ref           = (ggml_from_float_t) quantize_row_q6_K_ref,
762
    },
763
    [GGML_TYPE_IQ2_XXS] = {
764
        .type_name                = "iq2_xxs",
765
        .blck_size                = QK_K,
766
        .type_size                = sizeof(block_iq2_xxs),
767
        .is_quantized             = true,
768
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xxs,
769
        .from_float_ref           = NULL,
770
    },
771
    [GGML_TYPE_IQ2_XS] = {
772
        .type_name                = "iq2_xs",
773
        .blck_size                = QK_K,
774
        .type_size                = sizeof(block_iq2_xs),
775
        .is_quantized             = true,
776
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_xs,
777
        .from_float_ref           = NULL,
778
    },
779
    [GGML_TYPE_IQ3_XXS] = {
780
        .type_name                = "iq3_xxs",
781
        .blck_size                = QK_K,
782
        .type_size                = sizeof(block_iq3_xxs),
783
        .is_quantized             = true,
784
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_xxs,
785
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_xxs_ref,
786
    },
787
    [GGML_TYPE_IQ3_S] = {
788
        .type_name                = "iq3_s",
789
        .blck_size                = QK_K,
790
        .type_size                = sizeof(block_iq3_s),
791
        .is_quantized             = true,
792
        .to_float                 = (ggml_to_float_t) dequantize_row_iq3_s,
793
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq3_s_ref,
794
    },
795
    [GGML_TYPE_IQ2_S] = {
796
        .type_name                = "iq2_s",
797
        .blck_size                = QK_K,
798
        .type_size                = sizeof(block_iq2_s),
799
        .is_quantized             = true,
800
        .to_float                 = (ggml_to_float_t) dequantize_row_iq2_s,
801
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq2_s_ref,
802
    },
803
    [GGML_TYPE_IQ1_S] = {
804
        .type_name                = "iq1_s",
805
        .blck_size                = QK_K,
806
        .type_size                = sizeof(block_iq1_s),
807
        .is_quantized             = true,
808
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_s,
809
        .from_float_ref           = NULL,
810
    },
811
    [GGML_TYPE_IQ1_M] = {
812
        .type_name                = "iq1_m",
813
        .blck_size                = QK_K,
814
        .type_size                = sizeof(block_iq1_m),
815
        .is_quantized             = true,
816
        .to_float                 = (ggml_to_float_t) dequantize_row_iq1_m,
817
        .from_float_ref           = NULL,
818
    },
819
    [GGML_TYPE_IQ4_NL] = {
820
        .type_name                = "iq4_nl",
821
        .blck_size                = QK4_NL,
822
        .type_size                = sizeof(block_iq4_nl),
823
        .is_quantized             = true,
824
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_nl,
825
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_nl_ref,
826
    },
827
    [GGML_TYPE_IQ4_XS] = {
828
        .type_name                = "iq4_xs",
829
        .blck_size                = QK_K,
830
        .type_size                = sizeof(block_iq4_xs),
831
        .is_quantized             = true,
832
        .to_float                 = (ggml_to_float_t) dequantize_row_iq4_xs,
833
        .from_float_ref           = (ggml_from_float_t)quantize_row_iq4_xs_ref,
834
    },
835
    [GGML_TYPE_Q8_K] = {
836
        .type_name                = "q8_K",
837
        .blck_size                = QK_K,
838
        .type_size                = sizeof(block_q8_K),
839
        .is_quantized             = true,
840
    },
841
    [GGML_TYPE_BF16] = {
842
        .type_name                = "bf16",
843
        .blck_size                = 1,
844
        .type_size                = sizeof(ggml_bf16_t),
845
        .is_quantized             = false,
846
        .to_float                 = (ggml_to_float_t) ggml_bf16_to_fp32_row,
847
        .from_float_ref           = (ggml_from_float_t) ggml_fp32_to_bf16_row_ref,
848
    },
849
    [31] = { // GGML_TYPE_Q4_0_4_4
850
        .type_name                = "TYPE_Q4_0_4_4 REMOVED, use Q4_0 with runtime repacking",
851
        .blck_size                = 0,
852
        .type_size                = 0,
853
        .is_quantized             = false,
854
    },
855
    [32] = { // GGML_TYPE_Q4_0_4_8
856
        .type_name                = "TYPE_Q4_0_4_8 REMOVED, use Q4_0 with runtime repacking",
857
        .blck_size                = 0,
858
        .type_size                = 0,
859
        .is_quantized             = false,
860
    },
861
    [33] = { // GGML_TYPE_Q4_0_8_8
862
        .type_name                = "TYPE_Q4_0_8_8 REMOVED, use Q4_0 with runtime repacking",
863
        .blck_size                = 0,
864
        .type_size                = 0,
865
        .is_quantized             = false,
866
    },
867
    [GGML_TYPE_TQ1_0] = {
868
        .type_name                = "tq1_0",
869
        .blck_size                = QK_K,
870
        .type_size                = sizeof(block_tq1_0),
871
        .is_quantized             = true,
872
        .to_float                 = (ggml_to_float_t) dequantize_row_tq1_0,
873
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq1_0_ref,
874
    },
875
    [GGML_TYPE_TQ2_0] = {
876
        .type_name                = "tq2_0",
877
        .blck_size                = QK_K,
878
        .type_size                = sizeof(block_tq2_0),
879
        .is_quantized             = true,
880
        .to_float                 = (ggml_to_float_t) dequantize_row_tq2_0,
881
        .from_float_ref           = (ggml_from_float_t) quantize_row_tq2_0_ref,
882
    },
883
    [36] = { // GGML_TYPE_IQ4_NL_4_4
884
        .type_name                = "TYPE_IQ4_NL_4_4 REMOVED, use IQ4_NL with runtime repacking",
885
        .blck_size                = 0,
886
        .type_size                = 0,
887
        .is_quantized             = false,
888
    },
889
    [37] = { // GGML_TYPE_IQ4_NL_4_8
890
        .type_name                = "TYPE_IQ4_NL_4_8 REMOVED, use IQ4_NL with runtime repacking",
891
        .blck_size                = 0,
892
        .type_size                = 0,
893
        .is_quantized             = false,
894
    },
895
    [38] = { // GGML_TYPE_IQ4_NL_8_8
896
        .type_name                = "TYPE_IQ4_NL_8_8 REMOVED, use IQ4_NL with runtime repacking",
897
        .blck_size                = 0,
898
        .type_size                = 0,
899
        .is_quantized             = false,
900
    },
901
};
902
903
0
const struct ggml_type_traits * ggml_get_type_traits(enum ggml_type type) {
904
0
    assert(type >= 0);
905
0
    assert(type < GGML_TYPE_COUNT);
906
0
    return &type_traits[type];
907
0
}
908
909
//
910
// ggml object
911
//
912
913
struct ggml_object {
914
    size_t offs;
915
    size_t size;
916
917
    struct ggml_object * next;
918
919
    enum ggml_object_type type;
920
921
    char padding[4];
922
};
923
924
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
925
926
//
927
// ggml context
928
//
929
930
struct ggml_context {
931
    size_t mem_size;
932
    void * mem_buffer;
933
    bool   mem_buffer_owned;
934
    bool   no_alloc;
935
936
    int    n_objects;
937
938
    struct ggml_object * objects_begin;
939
    struct ggml_object * objects_end;
940
};
941
942
//
943
// data types
944
//
945
946
static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
947
    "NONE",
948
949
    "DUP",
950
    "ADD",
951
    "ADD_ID",
952
    "ADD1",
953
    "ACC",
954
    "SUB",
955
    "MUL",
956
    "DIV",
957
    "SQR",
958
    "SQRT",
959
    "LOG",
960
    "SIN",
961
    "COS",
962
    "SUM",
963
    "SUM_ROWS",
964
    "CUMSUM",
965
    "MEAN",
966
    "ARGMAX",
967
    "COUNT_EQUAL",
968
    "REPEAT",
969
    "REPEAT_BACK",
970
    "CONCAT",
971
    "SILU_BACK",
972
    "NORM",
973
    "RMS_NORM",
974
    "RMS_NORM_BACK",
975
    "GROUP_NORM",
976
    "L2_NORM",
977
978
    "MUL_MAT",
979
    "MUL_MAT_ID",
980
    "OUT_PROD",
981
982
    "SCALE",
983
    "SET",
984
    "CPY",
985
    "CONT",
986
    "RESHAPE",
987
    "VIEW",
988
    "PERMUTE",
989
    "TRANSPOSE",
990
    "GET_ROWS",
991
    "GET_ROWS_BACK",
992
    "SET_ROWS",
993
    "DIAG",
994
    "DIAG_MASK_INF",
995
    "DIAG_MASK_ZERO",
996
    "SOFT_MAX",
997
    "SOFT_MAX_BACK",
998
    "ROPE",
999
    "ROPE_BACK",
1000
    "CLAMP",
1001
    "CONV_TRANSPOSE_1D",
1002
    "IM2COL",
1003
    "IM2COL_BACK",
1004
    "IM2COL_3D",
1005
    "CONV_2D",
1006
    "CONV_3D",
1007
    "CONV_2D_DW",
1008
    "CONV_TRANSPOSE_2D",
1009
    "POOL_1D",
1010
    "POOL_2D",
1011
    "POOL_2D_BACK",
1012
    "UPSCALE",
1013
    "PAD",
1014
    "PAD_REFLECT_1D",
1015
    "ROLL",
1016
    "ARANGE",
1017
    "TIMESTEP_EMBEDDING",
1018
    "ARGSORT",
1019
    "TOP_K",
1020
    "LEAKY_RELU",
1021
    "TRI",
1022
    "FILL",
1023
1024
    "FLASH_ATTN_EXT",
1025
    "FLASH_ATTN_BACK",
1026
    "SSM_CONV",
1027
    "SSM_SCAN",
1028
    "WIN_PART",
1029
    "WIN_UNPART",
1030
    "GET_REL_POS",
1031
    "ADD_REL_POS",
1032
    "RWKV_WKV6",
1033
    "GATED_LINEAR_ATTN",
1034
    "RWKV_WKV7",
1035
    "SOLVE_TRI",
1036
1037
    "UNARY",
1038
1039
    "MAP_CUSTOM1",
1040
    "MAP_CUSTOM2",
1041
    "MAP_CUSTOM3",
1042
1043
    "CUSTOM",
1044
1045
    "CROSS_ENTROPY_LOSS",
1046
    "CROSS_ENTROPY_LOSS_BACK",
1047
    "OPT_STEP_ADAMW",
1048
    "OPT_STEP_SGD",
1049
1050
    "GLU",
1051
};
1052
1053
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1054
1055
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
1056
    "none",
1057
1058
    "x",
1059
    "x+y",
1060
    "x[i]+y",
1061
    "x+y",
1062
    "view(x,nb,offset)+=y->x",
1063
    "x-y",
1064
    "x*y",
1065
    "x/y",
1066
    "x^2",
1067
    "√x",
1068
    "log(x)",
1069
    "sin(x)",
1070
    "cos(x)",
1071
    "Σx",
1072
    "Σx_k",
1073
    "cumsum(x)",
1074
    "Σx/n",
1075
    "argmax(x)",
1076
    "count_equal(x)",
1077
    "repeat(x)",
1078
    "repeat_back(x)",
1079
    "concat(x, y)",
1080
    "silu_back(x)",
1081
    "norm(x)",
1082
    "rms_norm(x)",
1083
    "rms_norm_back(x)",
1084
    "group_norm(x)",
1085
    "l2_norm(x)",
1086
1087
    "X*Y",
1088
    "X[i]*Y",
1089
    "X*Y",
1090
1091
    "x*v",
1092
    "y-\\>view(x)",
1093
    "x-\\>y",
1094
    "cont(x)",
1095
    "reshape(x)",
1096
    "view(x)",
1097
    "permute(x)",
1098
    "transpose(x)",
1099
    "get_rows(x)",
1100
    "get_rows_back(x)",
1101
    "set_rows(x)",
1102
    "diag(x)",
1103
    "diag_mask_inf(x)",
1104
    "diag_mask_zero(x)",
1105
    "soft_max(x)",
1106
    "soft_max_back(x)",
1107
    "rope(x)",
1108
    "rope_back(x)",
1109
    "clamp(x)",
1110
    "conv_transpose_1d(x)",
1111
    "im2col(x)",
1112
    "im2col_back(x)",
1113
    "im2col_3d(x)",
1114
    "conv_2d(x)",
1115
    "conv_3d(x)",
1116
    "conv_2d_dw(x)",
1117
    "conv_transpose_2d(x)",
1118
    "pool_1d(x)",
1119
    "pool_2d(x)",
1120
    "pool_2d_back(x)",
1121
    "upscale(x)",
1122
    "pad(x)",
1123
    "pad_reflect_1d(x)",
1124
    "roll(x)",
1125
    "arange(start, stop, step)",
1126
    "timestep_embedding(timesteps, dim, max_period)",
1127
    "argsort(x)",
1128
    "top_k(x)",
1129
    "leaky_relu(x)",
1130
    "tri(x)",
1131
    "fill(x, c)",
1132
1133
    "flash_attn_ext(x)",
1134
    "flash_attn_back(x)",
1135
    "ssm_conv(x)",
1136
    "ssm_scan(x)",
1137
    "win_part(x)",
1138
    "win_unpart(x)",
1139
    "get_rel_pos(x)",
1140
    "add_rel_pos(x)",
1141
    "rwkv_wkv6(k, v, r, tf, td, s)",
1142
    "gated_linear_attn(k, v, q, gate, s)",
1143
    "rwkv_wkv7(r, w, k, v, a, b, s)",
1144
    "A X = B, A triangular, solve X",
1145
1146
    "unary(x)",
1147
1148
    "map_custom(x)",
1149
    "map_custom(x,y)",
1150
    "map_custom(x,y,z)",
1151
1152
    "custom(x)",
1153
1154
    "cross_entropy_loss(x,y)",
1155
    "cross_entropy_loss_back(x,y)",
1156
    "adamw(x)",
1157
    "sgd(x)",
1158
1159
    "glu(x)",
1160
};
1161
1162
static_assert(GGML_OP_COUNT == 95, "GGML_OP_COUNT != 95");
1163
1164
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
1165
1166
static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = {
1167
    "ABS",
1168
    "SGN",
1169
    "NEG",
1170
    "STEP",
1171
    "TANH",
1172
    "ELU",
1173
    "RELU",
1174
    "SIGMOID",
1175
    "GELU",
1176
    "GELU_QUICK",
1177
    "SILU",
1178
    "HARDSWISH",
1179
    "HARDSIGMOID",
1180
    "EXP",
1181
    "EXPM1",
1182
    "SOFTPLUS",
1183
    "GELU_ERF",
1184
    "XIELU",
1185
    "FLOOR",
1186
    "CEIL",
1187
    "ROUND",
1188
    "TRUNC",
1189
};
1190
1191
static_assert(GGML_UNARY_OP_COUNT == 22, "GGML_UNARY_OP_COUNT != 22");
1192
1193
static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = {
1194
    "REGLU",
1195
    "GEGLU",
1196
    "SWIGLU",
1197
    "SWIGLU_OAI",
1198
    "GEGLU_ERF",
1199
    "GEGLU_QUICK",
1200
};
1201
1202
static_assert(GGML_GLU_OP_COUNT == 6, "GGML_GLU_OP_COUNT != 6");
1203
1204
1205
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
1206
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
1207
1208
1209
////////////////////////////////////////////////////////////////////////////////
1210
1211
0
void ggml_print_object(const struct ggml_object * obj) {
1212
0
    GGML_LOG_INFO(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
1213
0
            obj->type, obj->offs, obj->size, (const void *) obj->next);
1214
0
}
1215
1216
0
void ggml_print_objects(const struct ggml_context * ctx) {
1217
0
    struct ggml_object * obj = ctx->objects_begin;
1218
1219
0
    GGML_LOG_INFO("%s: objects in context %p:\n", __func__, (const void *) ctx);
1220
1221
0
    while (obj != NULL) {
1222
0
        ggml_print_object(obj);
1223
0
        obj = obj->next;
1224
0
    }
1225
1226
0
    GGML_LOG_INFO("%s: --- end ---\n", __func__);
1227
0
}
1228
1229
4.34k
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
1230
4.34k
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1231
1232
4.34k
    return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1233
4.34k
}
1234
1235
0
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
1236
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1237
1238
0
    return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
1239
0
}
1240
1241
6.39k
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
1242
31.5k
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1243
25.2k
        if (tensor->ne[i] <= 0) {
1244
107
            return 0;
1245
107
        }
1246
25.2k
    }
1247
1248
6.28k
    size_t nbytes;
1249
6.28k
    const size_t blck_size = ggml_blck_size(tensor->type);
1250
6.28k
    if (blck_size == 1) {
1251
6.17k
        nbytes = ggml_type_size(tensor->type);
1252
30.8k
        for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1253
24.7k
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1254
24.7k
        }
1255
6.17k
    }
1256
110
    else {
1257
110
        nbytes = tensor->ne[0]*tensor->nb[0]/blck_size;
1258
440
        for (int i = 1; i < GGML_MAX_DIMS; ++i) {
1259
330
            nbytes += (tensor->ne[i] - 1)*tensor->nb[i];
1260
330
        }
1261
110
    }
1262
1263
6.28k
    return nbytes;
1264
6.39k
}
1265
1266
0
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
1267
0
    return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
1268
0
}
1269
1270
15.8k
int64_t ggml_blck_size(enum ggml_type type) {
1271
15.8k
    assert(type >= 0);
1272
15.8k
    assert(type < GGML_TYPE_COUNT);
1273
15.8k
    return type_traits[type].blck_size;
1274
15.8k
}
1275
1276
15.6k
size_t ggml_type_size(enum ggml_type type) {
1277
15.6k
    assert(type >= 0);
1278
15.6k
    assert(type < GGML_TYPE_COUNT);
1279
15.6k
    return type_traits[type].type_size;
1280
15.6k
}
1281
1282
1.83k
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
1283
1.83k
    assert(type >= 0);
1284
1.83k
    assert(type < GGML_TYPE_COUNT);
1285
1.83k
    assert(ne % ggml_blck_size(type) == 0);
1286
1.83k
    return ggml_type_size(type)*ne/ggml_blck_size(type);
1287
1.83k
}
1288
1289
281
const char * ggml_type_name(enum ggml_type type) {
1290
281
    assert(type >= 0);
1291
281
    assert(type < GGML_TYPE_COUNT);
1292
281
    return type_traits[type].type_name;
1293
281
}
1294
1295
0
bool ggml_is_quantized(enum ggml_type type) {
1296
0
    assert(type >= 0);
1297
0
    assert(type < GGML_TYPE_COUNT);
1298
0
    return type_traits[type].is_quantized;
1299
0
}
1300
1301
0
const char * ggml_op_name(enum ggml_op op) {
1302
0
    return GGML_OP_NAME[op];
1303
0
}
1304
1305
0
const char * ggml_op_symbol(enum ggml_op op) {
1306
0
    return GGML_OP_SYMBOL[op];
1307
0
}
1308
1309
0
const char * ggml_unary_op_name(enum ggml_unary_op op) {
1310
0
    return GGML_UNARY_OP_NAME[op];
1311
0
}
1312
1313
0
const char * ggml_glu_op_name(enum ggml_glu_op op) {
1314
0
    return GGML_GLU_OP_NAME[op];
1315
0
}
1316
1317
0
const char * ggml_op_desc(const struct ggml_tensor * t) {
1318
0
    if (t->op == GGML_OP_UNARY) {
1319
0
        enum ggml_unary_op uop = ggml_get_unary_op(t);
1320
0
        return ggml_unary_op_name(uop);
1321
0
    }
1322
0
    if (t->op == GGML_OP_GLU) {
1323
0
        enum ggml_glu_op gop = ggml_get_glu_op(t);
1324
0
        return ggml_glu_op_name(gop);
1325
0
    }
1326
0
    return ggml_op_name(t->op);
1327
0
}
1328
1329
0
size_t ggml_element_size(const struct ggml_tensor * tensor) {
1330
0
    return ggml_type_size(tensor->type);
1331
0
}
1332
1333
0
bool ggml_is_scalar(const struct ggml_tensor * tensor) {
1334
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1335
1336
0
    return tensor->ne[0] == 1 && tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1337
0
}
1338
1339
0
bool ggml_is_vector(const struct ggml_tensor * tensor) {
1340
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1341
1342
0
    return tensor->ne[1] == 1 && tensor->ne[2] == 1 && tensor->ne[3] == 1;
1343
0
}
1344
1345
0
bool ggml_is_matrix(const struct ggml_tensor * tensor) {
1346
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1347
1348
0
    return tensor->ne[2] == 1 && tensor->ne[3] == 1;
1349
0
}
1350
1351
0
bool ggml_is_3d(const struct ggml_tensor * tensor) {
1352
0
    return tensor->ne[3] == 1;
1353
0
}
1354
1355
0
int ggml_n_dims(const struct ggml_tensor * tensor) {
1356
0
    for (int i = GGML_MAX_DIMS - 1; i >= 1; --i) {
1357
0
        if (tensor->ne[i] > 1) {
1358
0
            return i + 1;
1359
0
        }
1360
0
    }
1361
0
    return 1;
1362
0
}
1363
1364
0
enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
1365
0
    enum ggml_type wtype = GGML_TYPE_COUNT;
1366
1367
0
    switch (ftype) {
1368
0
        case GGML_FTYPE_ALL_F32:              wtype = GGML_TYPE_F32;   break;
1369
0
        case GGML_FTYPE_MOSTLY_F16:           wtype = GGML_TYPE_F16;   break;
1370
0
        case GGML_FTYPE_MOSTLY_BF16:          wtype = GGML_TYPE_BF16;  break;
1371
0
        case GGML_FTYPE_MOSTLY_Q4_0:          wtype = GGML_TYPE_Q4_0;  break;
1372
0
        case GGML_FTYPE_MOSTLY_Q4_1:          wtype = GGML_TYPE_Q4_1;  break;
1373
0
        case GGML_FTYPE_MOSTLY_Q5_0:          wtype = GGML_TYPE_Q5_0;  break;
1374
0
        case GGML_FTYPE_MOSTLY_Q5_1:          wtype = GGML_TYPE_Q5_1;  break;
1375
0
        case GGML_FTYPE_MOSTLY_Q8_0:          wtype = GGML_TYPE_Q8_0;  break;
1376
0
        case GGML_FTYPE_MOSTLY_MXFP4:         wtype = GGML_TYPE_MXFP4; break;
1377
0
        case GGML_FTYPE_MOSTLY_Q2_K:          wtype = GGML_TYPE_Q2_K;  break;
1378
0
        case GGML_FTYPE_MOSTLY_Q3_K:          wtype = GGML_TYPE_Q3_K;  break;
1379
0
        case GGML_FTYPE_MOSTLY_Q4_K:          wtype = GGML_TYPE_Q4_K;  break;
1380
0
        case GGML_FTYPE_MOSTLY_Q5_K:          wtype = GGML_TYPE_Q5_K;  break;
1381
0
        case GGML_FTYPE_MOSTLY_Q6_K:          wtype = GGML_TYPE_Q6_K;  break;
1382
0
        case GGML_FTYPE_MOSTLY_IQ2_XXS:       wtype = GGML_TYPE_IQ2_XXS;  break;
1383
0
        case GGML_FTYPE_MOSTLY_IQ2_XS:        wtype = GGML_TYPE_IQ2_XS;   break;
1384
0
        case GGML_FTYPE_MOSTLY_IQ3_XXS:       wtype = GGML_TYPE_IQ3_XXS;  break;
1385
0
        case GGML_FTYPE_MOSTLY_IQ1_S:         wtype = GGML_TYPE_IQ1_S;    break;
1386
0
        case GGML_FTYPE_MOSTLY_IQ1_M:         wtype = GGML_TYPE_IQ1_M;    break;
1387
0
        case GGML_FTYPE_MOSTLY_IQ4_NL:        wtype = GGML_TYPE_IQ4_NL;   break;
1388
0
        case GGML_FTYPE_MOSTLY_IQ4_XS:        wtype = GGML_TYPE_IQ4_XS;   break;
1389
0
        case GGML_FTYPE_MOSTLY_IQ3_S:         wtype = GGML_TYPE_IQ3_S;    break;
1390
0
        case GGML_FTYPE_MOSTLY_IQ2_S:         wtype = GGML_TYPE_IQ2_S;    break;
1391
0
        case GGML_FTYPE_UNKNOWN:              wtype = GGML_TYPE_COUNT; break;
1392
0
        case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16: wtype = GGML_TYPE_COUNT; break;
1393
0
    }
1394
1395
0
    GGML_ASSERT(wtype != GGML_TYPE_COUNT);
1396
1397
0
    return wtype;
1398
0
}
1399
1400
1.40k
size_t ggml_tensor_overhead(void) {
1401
1.40k
    return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
1402
1.40k
}
1403
1404
0
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
1405
0
    return tensor->nb[0] > tensor->nb[1];
1406
0
}
1407
1408
0
static bool ggml_is_contiguous_n(const struct ggml_tensor * tensor, int n) {
1409
0
    size_t next_nb = ggml_type_size(tensor->type);
1410
0
    if (tensor->ne[0] != ggml_blck_size(tensor->type) && tensor->nb[0] != next_nb) {
1411
0
        return false;
1412
0
    }
1413
0
    next_nb *= tensor->ne[0]/ggml_blck_size(tensor->type);
1414
0
    for (int i = 1; i < GGML_MAX_DIMS; i++) {
1415
0
        if (i > n) {
1416
0
            if (tensor->ne[i] != 1 && tensor->nb[i] != next_nb) {
1417
0
                return false;
1418
0
            }
1419
0
            next_nb *= tensor->ne[i];
1420
0
        } else {
1421
            // this dimension does not need to be contiguous
1422
0
            next_nb = tensor->ne[i]*tensor->nb[i];
1423
0
        }
1424
0
    }
1425
0
    return true;
1426
0
}
1427
1428
0
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
1429
0
    return ggml_is_contiguous_0(tensor);
1430
0
}
1431
1432
0
bool ggml_is_contiguous_0(const struct ggml_tensor * tensor) {
1433
0
    return ggml_is_contiguous_n(tensor, 0);
1434
0
}
1435
1436
0
bool ggml_is_contiguous_1(const struct ggml_tensor * tensor) {
1437
0
    return ggml_is_contiguous_n(tensor, 1);
1438
0
}
1439
1440
0
bool ggml_is_contiguous_2(const struct ggml_tensor * tensor) {
1441
0
    return ggml_is_contiguous_n(tensor, 2);
1442
0
}
1443
1444
0
bool ggml_is_contiguously_allocated(const struct ggml_tensor * tensor) {
1445
0
    return ggml_nbytes(tensor) == ggml_nelements(tensor) * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type);
1446
0
}
1447
1448
0
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
1449
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1450
1451
0
    return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
1452
0
}
1453
1454
0
bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) {
1455
0
    return
1456
0
        tensor->nb[0] > tensor->nb[2] &&
1457
0
        tensor->nb[1] > tensor->nb[0] &&
1458
0
        tensor->nb[2] == ggml_type_size(tensor->type);
1459
0
}
1460
1461
0
bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) {
1462
0
    return
1463
0
        tensor->ne[0] == ggml_blck_size(tensor->type) ||
1464
0
        tensor->nb[0] == ggml_type_size(tensor->type);
1465
0
}
1466
1467
0
static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
1468
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1469
1470
0
    return
1471
0
        tensor->nb[0] == ggml_type_size(tensor->type) &&
1472
0
        tensor->nb[2] == tensor->nb[1]*tensor->ne[1] &&
1473
0
        tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
1474
0
}
1475
1476
0
bool ggml_is_empty(const struct ggml_tensor * tensor) {
1477
0
    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
1478
0
        if (tensor->ne[i] == 0) {
1479
            // empty if any dimension has no elements
1480
0
            return true;
1481
0
        }
1482
0
    }
1483
0
    return false;
1484
0
}
1485
1486
0
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1487
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1488
1489
0
    return
1490
0
        (t0->ne[0] == t1->ne[0]) &&
1491
0
        (t0->ne[1] == t1->ne[1]) &&
1492
0
        (t0->ne[2] == t1->ne[2]) &&
1493
0
        (t0->ne[3] == t1->ne[3]);
1494
0
}
1495
1496
0
bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1497
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1498
1499
0
    return
1500
0
        (t0->nb[0] == t1->nb[0]) &&
1501
0
        (t0->nb[1] == t1->nb[1]) &&
1502
0
        (t0->nb[2] == t1->nb[2]) &&
1503
0
        (t0->nb[3] == t1->nb[3]);
1504
0
}
1505
1506
0
bool ggml_is_view(const struct ggml_tensor * t) {
1507
0
    return ggml_impl_is_view(t);
1508
0
}
1509
1510
// check if t1 can be represented as a repetition of t0
1511
0
bool ggml_can_repeat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1512
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1513
1514
0
    return ggml_is_empty(t0) ? ggml_is_empty(t1) :
1515
0
        (t1->ne[0]%t0->ne[0] == 0) &&
1516
0
        (t1->ne[1]%t0->ne[1] == 0) &&
1517
0
        (t1->ne[2]%t0->ne[2] == 0) &&
1518
0
        (t1->ne[3]%t0->ne[3] == 0);
1519
0
}
1520
1521
0
static inline bool ggml_can_repeat_rows(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
1522
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
1523
1524
0
    return (t0->ne[0] == t1->ne[0]) && ggml_can_repeat(t0, t1);
1525
0
}
1526
1527
// assert that pointer is aligned to GGML_MEM_ALIGN
1528
#define GGML_ASSERT_ALIGNED(ptr) \
1529
7.12k
    GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
1530
1531
////////////////////////////////////////////////////////////////////////////////
1532
1533
5.28k
struct ggml_context * ggml_init(struct ggml_init_params params) {
1534
5.28k
    bool is_first_call = true;
1535
1536
5.28k
    ggml_critical_section_start();
1537
1538
5.28k
    if (is_first_call) {
1539
        // initialize time system (required on Windows)
1540
5.28k
        ggml_time_init();
1541
1542
5.28k
        is_first_call = false;
1543
5.28k
    }
1544
1545
5.28k
    ggml_critical_section_end();
1546
1547
5.28k
    struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
1548
1549
    // allow to call ggml_init with 0 size
1550
5.28k
    if (params.mem_size == 0) {
1551
4.84k
        params.mem_size = GGML_MEM_ALIGN;
1552
4.84k
    }
1553
1554
5.28k
    const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);
1555
1556
5.28k
    *ctx = (struct ggml_context) {
1557
5.28k
        /*.mem_size           =*/ mem_size,
1558
5.28k
        /*.mem_buffer         =*/ params.mem_buffer ? params.mem_buffer : ggml_aligned_malloc(mem_size),
1559
5.28k
        /*.mem_buffer_owned   =*/ params.mem_buffer ? false : true,
1560
5.28k
        /*.no_alloc           =*/ params.no_alloc,
1561
5.28k
        /*.n_objects          =*/ 0,
1562
5.28k
        /*.objects_begin      =*/ NULL,
1563
5.28k
        /*.objects_end        =*/ NULL,
1564
5.28k
    };
1565
1566
5.28k
    GGML_ASSERT(ctx->mem_buffer != NULL);
1567
1568
5.28k
    GGML_ASSERT_ALIGNED(ctx->mem_buffer);
1569
1570
5.28k
    GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
1571
1572
5.28k
    return ctx;
1573
5.28k
}
1574
1575
0
void ggml_reset(struct ggml_context * ctx) {
1576
0
    if (ctx == NULL) {
1577
0
        return;
1578
0
    }
1579
1580
0
    ctx->n_objects     = 0;
1581
0
    ctx->objects_begin = NULL;
1582
0
    ctx->objects_end   = NULL;
1583
0
}
1584
1585
5.27k
void ggml_free(struct ggml_context * ctx) {
1586
5.27k
    if (ctx == NULL) {
1587
0
        return;
1588
0
    }
1589
1590
5.27k
    if (ctx->mem_buffer_owned) {
1591
5.27k
        ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
1592
5.27k
    }
1593
1594
5.27k
    GGML_FREE(ctx);
1595
5.27k
}
1596
1597
0
size_t ggml_used_mem(const struct ggml_context * ctx) {
1598
0
    return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
1599
0
}
1600
1601
0
bool ggml_get_no_alloc(struct ggml_context * ctx) {
1602
0
    return ctx->no_alloc;
1603
0
}
1604
1605
1.93k
void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
1606
1.93k
    ctx->no_alloc = no_alloc;
1607
1.93k
}
1608
1609
0
void * ggml_get_mem_buffer(const struct ggml_context * ctx) {
1610
0
    return ctx->mem_buffer;
1611
0
}
1612
1613
0
size_t ggml_get_mem_size(const struct ggml_context * ctx) {
1614
0
    return ctx->mem_size;
1615
0
}
1616
1617
0
size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
1618
0
    size_t max_size = 0;
1619
1620
0
    for (struct ggml_tensor * tensor = ggml_get_first_tensor(ctx); tensor != NULL; tensor = ggml_get_next_tensor(ctx, tensor)) {
1621
0
        size_t bytes = ggml_nbytes(tensor);
1622
0
        max_size = MAX(max_size, bytes);
1623
0
    }
1624
1625
0
    return max_size;
1626
0
}
1627
1628
////////////////////////////////////////////////////////////////////////////////
1629
1630
1.83k
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
1631
    // always insert objects at the end of the context's memory pool
1632
1.83k
    struct ggml_object * obj_cur = ctx->objects_end;
1633
1634
1.83k
    const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
1635
1.83k
    const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
1636
1.83k
    const size_t cur_end  = cur_offs + cur_size;
1637
1638
    // align to GGML_MEM_ALIGN
1639
1.83k
    GGML_ASSERT(size <= SIZE_MAX - (GGML_MEM_ALIGN - 1));
1640
1.83k
    size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
1641
1642
1.83k
    char * const mem_buffer = ctx->mem_buffer;
1643
1.83k
    struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
1644
1645
    // integer overflow checks
1646
1.83k
    if (cur_end > SIZE_MAX - size_needed) {
1647
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu)\n", __func__, cur_end, size_needed);
1648
0
        return NULL;
1649
0
    }
1650
1.83k
    if (cur_end + size_needed > SIZE_MAX - GGML_OBJECT_SIZE) {
1651
0
        GGML_LOG_WARN("%s: overflow detected in cur_end (%zu) + size_needed (%zu) + GGML_OBJECT_SIZE (%zu)\n", __func__,
1652
0
                cur_end, size_needed, (size_t) GGML_OBJECT_SIZE);
1653
0
        return NULL;
1654
0
    }
1655
1656
1.83k
    if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
1657
0
        GGML_LOG_WARN("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
1658
0
                __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
1659
#ifndef NDEBUG
1660
        GGML_ABORT("not enough space in the context's memory pool");
1661
#endif
1662
0
        return NULL;
1663
0
    }
1664
1665
1.83k
    *obj_new = (struct ggml_object) {
1666
1.83k
        .offs = cur_end + GGML_OBJECT_SIZE,
1667
1.83k
        .size = size_needed,
1668
1.83k
        .next = NULL,
1669
1.83k
        .type = type,
1670
1.83k
    };
1671
1672
1.83k
    GGML_ASSERT_ALIGNED(mem_buffer + obj_new->offs);
1673
1674
1.83k
    if (obj_cur != NULL) {
1675
1.40k
        obj_cur->next = obj_new;
1676
1.40k
    } else {
1677
        // this is the first object in this context
1678
436
        ctx->objects_begin = obj_new;
1679
436
    }
1680
1681
1.83k
    ctx->objects_end = obj_new;
1682
1683
    //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
1684
1685
1.83k
    return obj_new;
1686
1.83k
}
1687
1688
static struct ggml_tensor * ggml_new_tensor_impl(
1689
        struct ggml_context * ctx,
1690
        enum   ggml_type      type,
1691
        int                   n_dims,
1692
        const int64_t       * ne,
1693
        struct ggml_tensor  * view_src,
1694
1.83k
        size_t                view_offs) {
1695
1696
1.83k
    GGML_ASSERT(type >= 0 && type < GGML_TYPE_COUNT);
1697
1.83k
    GGML_ASSERT(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
1698
1699
    // find the base tensor and absolute offset
1700
1.83k
    if (view_src != NULL && view_src->view_src != NULL) {
1701
0
        view_offs += view_src->view_offs;
1702
0
        view_src   = view_src->view_src;
1703
0
    }
1704
1705
1.83k
    size_t data_size = ggml_row_size(type, ne[0]);
1706
7.35k
    for (int i = 1; i < n_dims; i++) {
1707
5.51k
        data_size *= ne[i];
1708
5.51k
    }
1709
1710
1.83k
    GGML_ASSERT(view_src == NULL || data_size == 0 || data_size + view_offs <= ggml_nbytes(view_src));
1711
1712
1.83k
    void * data = view_src != NULL ? view_src->data : NULL;
1713
1.83k
    if (data != NULL) {
1714
0
        data = (char *) data + view_offs;
1715
0
    }
1716
1717
1.83k
    size_t obj_alloc_size = 0;
1718
1719
1.83k
    if (view_src == NULL && !ctx->no_alloc) {
1720
        // allocate tensor data in the context's memory pool
1721
0
        obj_alloc_size = data_size;
1722
0
    }
1723
1724
1.83k
    GGML_ASSERT(GGML_TENSOR_SIZE <= SIZE_MAX - obj_alloc_size);
1725
1726
1.83k
    struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TYPE_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size);
1727
1.83k
    GGML_ASSERT(obj_new);
1728
1729
1.83k
    struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);
1730
1731
1.83k
    *result = (struct ggml_tensor) {
1732
1.83k
        /*.type         =*/ type,
1733
1.83k
        /*.buffer       =*/ NULL,
1734
1.83k
        /*.ne           =*/ { 1, 1, 1, 1 },
1735
1.83k
        /*.nb           =*/ { 0, 0, 0, 0 },
1736
1.83k
        /*.op           =*/ GGML_OP_NONE,
1737
1.83k
        /*.op_params    =*/ { 0 },
1738
1.83k
        /*.flags        =*/ 0,
1739
1.83k
        /*.src          =*/ { NULL },
1740
1.83k
        /*.view_src     =*/ view_src,
1741
1.83k
        /*.view_offs    =*/ view_offs,
1742
1.83k
        /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
1743
1.83k
        /*.name         =*/ { 0 },
1744
1.83k
        /*.extra        =*/ NULL,
1745
1.83k
        /*.padding      =*/ { 0 },
1746
1.83k
    };
1747
1748
    // TODO: this should not be needed as long as we don't rely on aligned SIMD loads
1749
    //GGML_ASSERT_ALIGNED(result->data);
1750
1751
9.19k
    for (int i = 0; i < n_dims; i++) {
1752
7.35k
        result->ne[i] = ne[i];
1753
7.35k
    }
1754
1755
1.83k
    result->nb[0] = ggml_type_size(type);
1756
1.83k
    result->nb[1] = result->nb[0]*(result->ne[0]/ggml_blck_size(type));
1757
5.51k
    for (int i = 2; i < GGML_MAX_DIMS; i++) {
1758
3.67k
        result->nb[i] = result->nb[i - 1]*result->ne[i - 1];
1759
3.67k
    }
1760
1761
1.83k
    ctx->n_objects++;
1762
1763
1.83k
    return result;
1764
1.83k
}
1765
1766
struct ggml_tensor * ggml_new_tensor(
1767
        struct ggml_context * ctx,
1768
        enum   ggml_type      type,
1769
        int                   n_dims,
1770
1.83k
        const int64_t       * ne) {
1771
1.83k
    return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0);
1772
1.83k
}
1773
1774
struct ggml_tensor * ggml_new_tensor_1d(
1775
        struct ggml_context * ctx,
1776
        enum   ggml_type      type,
1777
0
        int64_t ne0) {
1778
0
    return ggml_new_tensor(ctx, type, 1, &ne0);
1779
0
}
1780
1781
struct ggml_tensor * ggml_new_tensor_2d(
1782
        struct ggml_context * ctx,
1783
        enum   ggml_type      type,
1784
        int64_t ne0,
1785
0
        int64_t ne1) {
1786
0
    const int64_t ne[2] = { ne0, ne1 };
1787
0
    return ggml_new_tensor(ctx, type, 2, ne);
1788
0
}
1789
1790
struct ggml_tensor * ggml_new_tensor_3d(
1791
        struct ggml_context * ctx,
1792
        enum   ggml_type      type,
1793
        int64_t ne0,
1794
        int64_t ne1,
1795
0
        int64_t ne2) {
1796
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
1797
0
    return ggml_new_tensor(ctx, type, 3, ne);
1798
0
}
1799
1800
struct ggml_tensor * ggml_new_tensor_4d(
1801
        struct ggml_context * ctx,
1802
        enum   ggml_type type,
1803
        int64_t ne0,
1804
        int64_t ne1,
1805
        int64_t ne2,
1806
0
        int64_t ne3) {
1807
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
1808
0
    return ggml_new_tensor(ctx, type, 4, ne);
1809
0
}
1810
1811
0
void * ggml_new_buffer(struct ggml_context * ctx, size_t nbytes) {
1812
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, nbytes);
1813
1814
0
    return (uint8_t *)ctx->mem_buffer + obj->offs;
1815
0
}
1816
1817
0
struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) {
1818
0
    return ggml_new_tensor(ctx, src->type, GGML_MAX_DIMS, src->ne);
1819
0
}
1820
1821
0
void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3) {
1822
0
    const int64_t ne2 = tensor->ne[2];
1823
0
    const int64_t ne1 = tensor->ne[1];
1824
0
    const int64_t ne0 = tensor->ne[0];
1825
1826
0
    const int64_t i3_ = (i/(ne2*ne1*ne0));
1827
0
    const int64_t i2_ = (i - i3_*ne2*ne1*ne0)/(ne1*ne0);
1828
0
    const int64_t i1_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0)/ne0;
1829
0
    const int64_t i0_ = (i - i3_*ne2*ne1*ne0 - i2_*ne1*ne0 - i1_*ne0);
1830
1831
0
    if (i0) {
1832
0
        * i0 = i0_;
1833
0
    }
1834
0
    if (i1) {
1835
0
        * i1 = i1_;
1836
0
    }
1837
0
    if (i2) {
1838
0
        * i2 = i2_;
1839
0
    }
1840
0
    if (i3) {
1841
0
        * i3 = i3_;
1842
0
    }
1843
0
}
1844
1845
0
void * ggml_get_data(const struct ggml_tensor * tensor) {
1846
0
    return tensor->data;
1847
0
}
1848
1849
0
float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
1850
0
    assert(tensor->type == GGML_TYPE_F32);
1851
0
    return (float *)(tensor->data);
1852
0
}
1853
1854
0
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
1855
0
    GGML_ASSERT(tensor->op == GGML_OP_UNARY);
1856
0
    return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
1857
0
}
1858
1859
0
enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) {
1860
0
    GGML_ASSERT(tensor->op == GGML_OP_GLU);
1861
0
    return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0);
1862
0
}
1863
1864
1.70k
const char * ggml_get_name(const struct ggml_tensor * tensor) {
1865
1.70k
    return tensor->name;
1866
1.70k
}
1867
1868
5.43k
struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name) {
1869
5.43k
    size_t i;
1870
37.9k
    for (i = 0; i < sizeof(tensor->name) - 1 && name[i] != '\0'; i++) {
1871
32.5k
        tensor->name[i] = name[i];
1872
32.5k
    }
1873
5.43k
    tensor->name[i] = '\0';
1874
5.43k
    return tensor;
1875
5.43k
}
1876
1877
0
struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...) {
1878
0
    va_list args;
1879
0
    va_start(args, fmt);
1880
0
    vsnprintf(tensor->name, sizeof(tensor->name), fmt, args);
1881
0
    va_end(args);
1882
0
    return tensor;
1883
0
}
1884
1885
struct ggml_tensor * ggml_view_tensor(
1886
        struct ggml_context * ctx,
1887
0
        struct ggml_tensor  * src) {
1888
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, GGML_MAX_DIMS, src->ne, src, 0);
1889
0
    ggml_format_name(result, "%s (view)", src->name);
1890
1891
0
    for (int i = 0; i < GGML_MAX_DIMS; i++) {
1892
0
        result->nb[i] = src->nb[i];
1893
0
    }
1894
1895
0
    return result;
1896
0
}
1897
1898
960
struct ggml_tensor * ggml_get_first_tensor(const struct ggml_context * ctx) {
1899
960
    struct ggml_object * obj = ctx->objects_begin;
1900
1901
960
    char * const mem_buffer = ctx->mem_buffer;
1902
1903
960
    while (obj != NULL) {
1904
436
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1905
436
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1906
436
        }
1907
1908
0
        obj = obj->next;
1909
0
    }
1910
1911
524
    return NULL;
1912
960
}
1913
1914
1.16k
struct ggml_tensor * ggml_get_next_tensor(const struct ggml_context * ctx, struct ggml_tensor * tensor) {
1915
1.16k
    struct ggml_object * obj = (struct ggml_object *) ((char *)tensor - GGML_OBJECT_SIZE);
1916
1.16k
    obj = obj->next;
1917
1918
1.16k
    char * const mem_buffer = ctx->mem_buffer;
1919
1920
1.16k
    while (obj != NULL) {
1921
1.00k
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1922
1.00k
            return (struct ggml_tensor *)(mem_buffer + obj->offs);
1923
1.00k
        }
1924
1925
0
        obj = obj->next;
1926
0
    }
1927
1928
166
    return NULL;
1929
1.16k
}
1930
1931
0
struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
1932
0
    struct ggml_object * obj = ctx->objects_begin;
1933
1934
0
    char * const mem_buffer = ctx->mem_buffer;
1935
1936
0
    while (obj != NULL) {
1937
0
        if (obj->type == GGML_OBJECT_TYPE_TENSOR) {
1938
0
            struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
1939
0
            if (strcmp(cur->name, name) == 0) {
1940
0
                return cur;
1941
0
            }
1942
0
        }
1943
1944
0
        obj = obj->next;
1945
0
    }
1946
1947
0
    return NULL;
1948
0
}
1949
1950
////////////////////////////////////////////////////////////////////////////////
1951
1952
// ggml_dup
1953
1954
static struct ggml_tensor * ggml_dup_impl(
1955
        struct ggml_context * ctx,
1956
        struct ggml_tensor  * a,
1957
0
        bool                  inplace) {
1958
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1959
1960
0
    result->op     = GGML_OP_DUP;
1961
0
    result->src[0] = a;
1962
1963
0
    return result;
1964
0
}
1965
1966
struct ggml_tensor * ggml_dup(
1967
        struct ggml_context * ctx,
1968
0
        struct ggml_tensor  * a) {
1969
0
    return ggml_dup_impl(ctx, a, false);
1970
0
}
1971
1972
struct ggml_tensor * ggml_dup_inplace(
1973
        struct ggml_context * ctx,
1974
0
        struct ggml_tensor  * a) {
1975
0
    return ggml_dup_impl(ctx, a, true);
1976
0
}
1977
1978
// ggml_add
1979
1980
static struct ggml_tensor * ggml_add_impl(
1981
        struct ggml_context * ctx,
1982
        struct ggml_tensor  * a,
1983
        struct ggml_tensor  * b,
1984
0
        bool                  inplace) {
1985
0
    GGML_ASSERT(ggml_can_repeat(b, a));
1986
1987
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
1988
1989
0
    result->op     = GGML_OP_ADD;
1990
0
    result->src[0] = a;
1991
0
    result->src[1] = b;
1992
1993
0
    return result;
1994
0
}
1995
1996
struct ggml_tensor * ggml_add(
1997
        struct ggml_context * ctx,
1998
        struct ggml_tensor  * a,
1999
0
        struct ggml_tensor  * b) {
2000
0
    return ggml_add_impl(ctx, a, b, false);
2001
0
}
2002
2003
struct ggml_tensor * ggml_add_inplace(
2004
        struct ggml_context * ctx,
2005
        struct ggml_tensor  * a,
2006
0
        struct ggml_tensor  * b) {
2007
0
    return ggml_add_impl(ctx, a, b, true);
2008
0
}
2009
2010
// ggml_add_cast
2011
2012
static struct ggml_tensor * ggml_add_cast_impl(
2013
        struct ggml_context * ctx,
2014
        struct ggml_tensor  * a,
2015
        struct ggml_tensor  * b,
2016
0
        enum   ggml_type      type) {
2017
    // TODO: support less-strict constraint
2018
    //       GGML_ASSERT(ggml_can_repeat(b, a));
2019
0
    GGML_ASSERT(ggml_can_repeat_rows(b, a));
2020
2021
    // currently only supported for quantized input and f16
2022
0
    GGML_ASSERT(ggml_is_quantized(a->type) ||
2023
0
                a->type == GGML_TYPE_F16 ||
2024
0
                a->type == GGML_TYPE_BF16);
2025
2026
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
2027
2028
0
    result->op     = GGML_OP_ADD;
2029
0
    result->src[0] = a;
2030
0
    result->src[1] = b;
2031
2032
0
    return result;
2033
0
}
2034
2035
struct ggml_tensor * ggml_add_cast(
2036
        struct ggml_context * ctx,
2037
        struct ggml_tensor  * a,
2038
        struct ggml_tensor  * b,
2039
0
        enum   ggml_type      type) {
2040
0
    return ggml_add_cast_impl(ctx, a, b, type);
2041
0
}
2042
2043
struct ggml_tensor * ggml_add_id(
2044
            struct ggml_context * ctx,
2045
            struct ggml_tensor  * a,
2046
            struct ggml_tensor  * b,
2047
0
            struct ggml_tensor  * ids) {
2048
2049
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
2050
0
    GGML_ASSERT(a->ne[1] == ids->ne[0]);
2051
0
    GGML_ASSERT(a->ne[2] == ids->ne[1]);
2052
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
2053
2054
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2055
2056
0
    result->op     = GGML_OP_ADD_ID;
2057
0
    result->src[0] = a;
2058
0
    result->src[1] = b;
2059
0
    result->src[2] = ids;
2060
2061
0
    return result;
2062
0
}
2063
2064
// ggml_add1
2065
2066
static struct ggml_tensor * ggml_add1_impl(
2067
        struct ggml_context * ctx,
2068
        struct ggml_tensor  * a,
2069
        struct ggml_tensor  * b,
2070
0
        bool                  inplace) {
2071
0
    GGML_ASSERT(ggml_is_scalar(b));
2072
0
    GGML_ASSERT(ggml_is_padded_1d(a));
2073
2074
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2075
2076
0
    result->op     = GGML_OP_ADD1;
2077
0
    result->src[0] = a;
2078
0
    result->src[1] = b;
2079
2080
0
    return result;
2081
0
}
2082
2083
struct ggml_tensor * ggml_add1(
2084
        struct ggml_context * ctx,
2085
        struct ggml_tensor  * a,
2086
0
        struct ggml_tensor  * b) {
2087
0
    return ggml_add1_impl(ctx, a, b, false);
2088
0
}
2089
2090
struct ggml_tensor * ggml_add1_inplace(
2091
        struct ggml_context * ctx,
2092
        struct ggml_tensor  * a,
2093
0
        struct ggml_tensor  * b) {
2094
0
    return ggml_add1_impl(ctx, a, b, true);
2095
0
}
2096
2097
// ggml_acc
2098
2099
static struct ggml_tensor * ggml_acc_impl(
2100
        struct ggml_context * ctx,
2101
        struct ggml_tensor  * a,
2102
        struct ggml_tensor  * b,
2103
        size_t                nb1,
2104
        size_t                nb2,
2105
        size_t                nb3,
2106
        size_t                offset,
2107
0
        bool                  inplace) {
2108
0
    GGML_ASSERT(ggml_nelements(b) <= ggml_nelements(a));
2109
0
    GGML_ASSERT(ggml_is_contiguous(a));
2110
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2111
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
2112
2113
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2114
2115
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
2116
0
    ggml_set_op_params(result, params, sizeof(params));
2117
2118
0
    result->op     = GGML_OP_ACC;
2119
0
    result->src[0] = a;
2120
0
    result->src[1] = b;
2121
2122
0
    return result;
2123
0
}
2124
2125
struct ggml_tensor * ggml_acc(
2126
        struct ggml_context * ctx,
2127
        struct ggml_tensor  * a,
2128
        struct ggml_tensor  * b,
2129
        size_t                nb1,
2130
        size_t                nb2,
2131
        size_t                nb3,
2132
0
        size_t                offset) {
2133
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
2134
0
}
2135
2136
struct ggml_tensor * ggml_acc_inplace(
2137
        struct ggml_context * ctx,
2138
        struct ggml_tensor  * a,
2139
        struct ggml_tensor  * b,
2140
        size_t                nb1,
2141
        size_t                nb2,
2142
        size_t                nb3,
2143
0
        size_t                offset) {
2144
0
    return ggml_acc_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
2145
0
}
2146
2147
// ggml_sub
2148
2149
static struct ggml_tensor * ggml_sub_impl(
2150
        struct ggml_context * ctx,
2151
        struct ggml_tensor  * a,
2152
        struct ggml_tensor  * b,
2153
0
        bool                  inplace) {
2154
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2155
2156
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2157
2158
0
    result->op     = GGML_OP_SUB;
2159
0
    result->src[0] = a;
2160
0
    result->src[1] = b;
2161
2162
0
    return result;
2163
0
}
2164
2165
struct ggml_tensor * ggml_sub(
2166
        struct ggml_context * ctx,
2167
        struct ggml_tensor  * a,
2168
0
        struct ggml_tensor  * b) {
2169
0
    return ggml_sub_impl(ctx, a, b, false);
2170
0
}
2171
2172
struct ggml_tensor * ggml_sub_inplace(
2173
        struct ggml_context * ctx,
2174
        struct ggml_tensor  * a,
2175
0
        struct ggml_tensor  * b) {
2176
0
    return ggml_sub_impl(ctx, a, b, true);
2177
0
}
2178
2179
// ggml_mul
2180
2181
static struct ggml_tensor * ggml_mul_impl(
2182
        struct ggml_context * ctx,
2183
        struct ggml_tensor  * a,
2184
        struct ggml_tensor  * b,
2185
0
        bool                  inplace) {
2186
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2187
2188
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2189
2190
0
    result->op     = GGML_OP_MUL;
2191
0
    result->src[0] = a;
2192
0
    result->src[1] = b;
2193
2194
0
    return result;
2195
0
}
2196
2197
struct ggml_tensor * ggml_mul(
2198
        struct ggml_context * ctx,
2199
        struct ggml_tensor  * a,
2200
0
        struct ggml_tensor  * b) {
2201
0
    return ggml_mul_impl(ctx, a, b, false);
2202
0
}
2203
2204
struct ggml_tensor * ggml_mul_inplace(
2205
        struct ggml_context * ctx,
2206
        struct ggml_tensor  * a,
2207
0
        struct ggml_tensor  * b) {
2208
0
    return ggml_mul_impl(ctx, a, b, true);
2209
0
}
2210
2211
// ggml_div
2212
2213
static struct ggml_tensor * ggml_div_impl(
2214
        struct ggml_context * ctx,
2215
        struct ggml_tensor  * a,
2216
        struct ggml_tensor  * b,
2217
0
        bool                  inplace) {
2218
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2219
2220
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2221
2222
0
    result->op     = GGML_OP_DIV;
2223
0
    result->src[0] = a;
2224
0
    result->src[1] = b;
2225
2226
0
    return result;
2227
0
}
2228
2229
struct ggml_tensor * ggml_div(
2230
        struct ggml_context * ctx,
2231
        struct ggml_tensor  * a,
2232
0
        struct ggml_tensor  * b) {
2233
0
    return ggml_div_impl(ctx, a, b, false);
2234
0
}
2235
2236
struct ggml_tensor * ggml_div_inplace(
2237
        struct ggml_context * ctx,
2238
        struct ggml_tensor  * a,
2239
0
        struct ggml_tensor  * b) {
2240
0
    return ggml_div_impl(ctx, a, b, true);
2241
0
}
2242
2243
// ggml_sqr
2244
2245
static struct ggml_tensor * ggml_sqr_impl(
2246
        struct ggml_context * ctx,
2247
        struct ggml_tensor  * a,
2248
0
        bool                  inplace) {
2249
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2250
2251
0
    result->op     = GGML_OP_SQR;
2252
0
    result->src[0] = a;
2253
2254
0
    return result;
2255
0
}
2256
2257
struct ggml_tensor * ggml_sqr(
2258
        struct ggml_context * ctx,
2259
0
        struct ggml_tensor  * a) {
2260
0
    return ggml_sqr_impl(ctx, a, false);
2261
0
}
2262
2263
struct ggml_tensor * ggml_sqr_inplace(
2264
        struct ggml_context * ctx,
2265
0
        struct ggml_tensor  * a) {
2266
0
    return ggml_sqr_impl(ctx, a, true);
2267
0
}
2268
2269
// ggml_sqrt
2270
2271
static struct ggml_tensor * ggml_sqrt_impl(
2272
        struct ggml_context * ctx,
2273
        struct ggml_tensor  * a,
2274
0
        bool                  inplace) {
2275
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2276
2277
0
    result->op     = GGML_OP_SQRT;
2278
0
    result->src[0] = a;
2279
2280
0
    return result;
2281
0
}
2282
2283
struct ggml_tensor * ggml_sqrt(
2284
        struct ggml_context * ctx,
2285
0
        struct ggml_tensor  * a) {
2286
0
    return ggml_sqrt_impl(ctx, a, false);
2287
0
}
2288
2289
struct ggml_tensor * ggml_sqrt_inplace(
2290
        struct ggml_context * ctx,
2291
0
        struct ggml_tensor  * a) {
2292
0
    return ggml_sqrt_impl(ctx, a, true);
2293
0
}
2294
2295
// ggml_log
2296
2297
static struct ggml_tensor * ggml_log_impl(
2298
        struct ggml_context * ctx,
2299
        struct ggml_tensor  * a,
2300
0
        bool                  inplace) {
2301
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2302
2303
0
    result->op     = GGML_OP_LOG;
2304
0
    result->src[0] = a;
2305
2306
0
    return result;
2307
0
}
2308
2309
struct ggml_tensor * ggml_log(
2310
        struct ggml_context * ctx,
2311
0
        struct ggml_tensor  * a) {
2312
0
    return ggml_log_impl(ctx, a, false);
2313
0
}
2314
2315
struct ggml_tensor * ggml_log_inplace(
2316
        struct ggml_context * ctx,
2317
0
        struct ggml_tensor  * a) {
2318
0
    return ggml_log_impl(ctx, a, true);
2319
0
}
2320
2321
struct ggml_tensor * ggml_expm1(
2322
        struct ggml_context * ctx,
2323
0
        struct ggml_tensor  * a) {
2324
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXPM1);
2325
0
}
2326
2327
struct ggml_tensor * ggml_expm1_inplace(
2328
        struct ggml_context * ctx,
2329
0
        struct ggml_tensor  * a) {
2330
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXPM1);
2331
0
}
2332
2333
struct ggml_tensor * ggml_softplus(
2334
        struct ggml_context * ctx,
2335
0
        struct ggml_tensor  * a) {
2336
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2337
0
}
2338
2339
struct ggml_tensor * ggml_softplus_inplace(
2340
        struct ggml_context * ctx,
2341
0
        struct ggml_tensor  * a) {
2342
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SOFTPLUS);
2343
0
}
2344
2345
// ggml_sin
2346
2347
static struct ggml_tensor * ggml_sin_impl(
2348
        struct ggml_context * ctx,
2349
        struct ggml_tensor  * a,
2350
0
        bool                  inplace) {
2351
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2352
2353
0
    result->op     = GGML_OP_SIN;
2354
0
    result->src[0] = a;
2355
2356
0
    return result;
2357
0
}
2358
2359
struct ggml_tensor * ggml_sin(
2360
        struct ggml_context * ctx,
2361
0
        struct ggml_tensor  * a) {
2362
0
    return ggml_sin_impl(ctx, a, false);
2363
0
}
2364
2365
struct ggml_tensor * ggml_sin_inplace(
2366
        struct ggml_context * ctx,
2367
0
        struct ggml_tensor  * a) {
2368
0
    return ggml_sin_impl(ctx, a, true);
2369
0
}
2370
2371
// ggml_cos
2372
2373
static struct ggml_tensor * ggml_cos_impl(
2374
        struct ggml_context * ctx,
2375
        struct ggml_tensor  * a,
2376
0
        bool                  inplace) {
2377
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2378
2379
0
    result->op     = GGML_OP_COS;
2380
0
    result->src[0] = a;
2381
2382
0
    return result;
2383
0
}
2384
2385
struct ggml_tensor * ggml_cos(
2386
        struct ggml_context * ctx,
2387
0
        struct ggml_tensor  * a) {
2388
0
    return ggml_cos_impl(ctx, a, false);
2389
0
}
2390
2391
struct ggml_tensor * ggml_cos_inplace(
2392
        struct ggml_context * ctx,
2393
0
        struct ggml_tensor  * a) {
2394
0
    return ggml_cos_impl(ctx, a, true);
2395
0
}
2396
2397
// ggml_sum
2398
2399
struct ggml_tensor * ggml_sum(
2400
        struct ggml_context * ctx,
2401
0
        struct ggml_tensor  * a) {
2402
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
2403
2404
0
    result->op     = GGML_OP_SUM;
2405
0
    result->src[0] = a;
2406
2407
0
    return result;
2408
0
}
2409
2410
// ggml_sum_rows
2411
2412
struct ggml_tensor * ggml_sum_rows(
2413
        struct ggml_context * ctx,
2414
0
        struct ggml_tensor  * a) {
2415
0
    int64_t ne[GGML_MAX_DIMS] = { 1 };
2416
0
    for (int i = 1; i < GGML_MAX_DIMS; ++i) {
2417
0
        ne[i] = a->ne[i];
2418
0
    }
2419
2420
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2421
2422
0
    result->op     = GGML_OP_SUM_ROWS;
2423
0
    result->src[0] = a;
2424
2425
0
    return result;
2426
0
}
2427
2428
// ggml_cumsum
2429
2430
struct ggml_tensor * ggml_cumsum(
2431
        struct ggml_context * ctx,
2432
0
        struct ggml_tensor  * a) {
2433
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
2434
2435
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2436
2437
0
    result->op     = GGML_OP_CUMSUM;
2438
0
    result->src[0] = a;
2439
2440
0
    return result;
2441
0
}
2442
2443
// ggml_mean
2444
2445
struct ggml_tensor * ggml_mean(
2446
        struct ggml_context * ctx,
2447
0
        struct ggml_tensor  * a) {
2448
0
    int64_t ne[4] = { 1, a->ne[1], a->ne[2], a->ne[3] };
2449
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
2450
2451
0
    result->op     = GGML_OP_MEAN;
2452
0
    result->src[0] = a;
2453
2454
0
    return result;
2455
0
}
2456
2457
// ggml_argmax
2458
2459
struct ggml_tensor * ggml_argmax(
2460
        struct ggml_context * ctx,
2461
0
        struct ggml_tensor  * a) {
2462
0
    GGML_ASSERT(ggml_is_matrix(a));
2463
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
2464
2465
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, a->ne[1]);
2466
2467
0
    result->op     = GGML_OP_ARGMAX;
2468
0
    result->src[0] = a;
2469
2470
0
    return result;
2471
0
}
2472
2473
// ggml_count_equal
2474
2475
struct ggml_tensor * ggml_count_equal(
2476
        struct ggml_context * ctx,
2477
        struct ggml_tensor  * a,
2478
0
        struct ggml_tensor  * b) {
2479
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
2480
2481
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, 1);
2482
2483
0
    result->op     = GGML_OP_COUNT_EQUAL;
2484
0
    result->src[0] = a;
2485
0
    result->src[1] = b;
2486
2487
0
    return result;
2488
0
}
2489
2490
// ggml_repeat
2491
2492
struct ggml_tensor * ggml_repeat(
2493
        struct ggml_context * ctx,
2494
        struct ggml_tensor  * a,
2495
0
        struct ggml_tensor  * b) {
2496
0
    GGML_ASSERT(ggml_can_repeat(a, b));
2497
2498
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2499
2500
0
    result->op     = GGML_OP_REPEAT;
2501
0
    result->src[0] = a;
2502
2503
0
    return result;
2504
0
}
2505
2506
struct ggml_tensor * ggml_repeat_4d(
2507
        struct ggml_context * ctx,
2508
        struct ggml_tensor * a,
2509
0
        int64_t ne0, int64_t ne1, int64_t ne2, int64_t ne3) {
2510
0
    const bool can_repeat = ggml_is_empty(a) || (
2511
0
        (ne0 % a->ne[0] == 0) &&
2512
0
        (ne1 % a->ne[1] == 0) &&
2513
0
        (ne2 % a->ne[2] == 0) &&
2514
0
        (ne3 % a->ne[3] == 0)
2515
0
    );
2516
0
    GGML_ASSERT(can_repeat);
2517
2518
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
2519
2520
0
    result->op     = GGML_OP_REPEAT;
2521
0
    result->src[0] = a;
2522
2523
0
    return result;
2524
0
}
2525
2526
// ggml_repeat_back
2527
2528
struct ggml_tensor * ggml_repeat_back(
2529
        struct ggml_context * ctx,
2530
        struct ggml_tensor  * a,
2531
0
        struct ggml_tensor  * b) {
2532
0
    GGML_ASSERT(ggml_can_repeat(b, a));
2533
2534
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, b->ne);
2535
2536
0
    result->op     = GGML_OP_REPEAT_BACK;
2537
0
    result->src[0] = a;
2538
2539
0
    return result;
2540
0
}
2541
2542
// ggml_concat
2543
2544
struct ggml_tensor * ggml_concat(
2545
    struct ggml_context * ctx,
2546
    struct ggml_tensor  * a,
2547
    struct ggml_tensor  * b,
2548
0
    int                   dim) {
2549
0
    GGML_ASSERT(dim >= 0 && dim < GGML_MAX_DIMS);
2550
0
    GGML_ASSERT(a->type == b->type);
2551
2552
0
    int64_t ne[GGML_MAX_DIMS];
2553
0
    for (int d = 0; d < GGML_MAX_DIMS; ++d) {
2554
0
        if (d == dim) {
2555
0
            ne[d] = a->ne[d] + b->ne[d];
2556
0
            continue;
2557
0
        }
2558
0
        GGML_ASSERT(a->ne[d] == b->ne[d]);
2559
0
        ne[d] = a->ne[d];
2560
0
    }
2561
2562
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, GGML_MAX_DIMS, ne);
2563
2564
0
    ggml_set_op_params_i32(result, 0, dim);
2565
2566
0
    result->op     = GGML_OP_CONCAT;
2567
0
    result->src[0] = a;
2568
0
    result->src[1] = b;
2569
2570
0
    return result;
2571
0
}
2572
2573
// ggml_abs
2574
2575
struct ggml_tensor * ggml_abs(
2576
        struct ggml_context * ctx,
2577
0
        struct ggml_tensor  * a) {
2578
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ABS);
2579
0
}
2580
2581
struct ggml_tensor * ggml_abs_inplace(
2582
        struct ggml_context * ctx,
2583
0
        struct ggml_tensor  * a) {
2584
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ABS);
2585
0
}
2586
2587
// ggml_sgn
2588
2589
struct ggml_tensor * ggml_sgn(
2590
        struct ggml_context * ctx,
2591
0
        struct ggml_tensor  * a) {
2592
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SGN);
2593
0
}
2594
2595
struct ggml_tensor * ggml_sgn_inplace(
2596
        struct ggml_context * ctx,
2597
0
        struct ggml_tensor  * a) {
2598
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SGN);
2599
0
}
2600
2601
// ggml_neg
2602
2603
struct ggml_tensor * ggml_neg(
2604
        struct ggml_context * ctx,
2605
0
        struct ggml_tensor  * a) {
2606
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_NEG);
2607
0
}
2608
2609
struct ggml_tensor * ggml_neg_inplace(
2610
        struct ggml_context * ctx,
2611
0
        struct ggml_tensor  * a) {
2612
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_NEG);
2613
0
}
2614
2615
// ggml_step
2616
2617
struct ggml_tensor * ggml_step(
2618
        struct ggml_context * ctx,
2619
0
        struct ggml_tensor  * a) {
2620
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_STEP);
2621
0
}
2622
2623
struct ggml_tensor * ggml_step_inplace(
2624
        struct ggml_context * ctx,
2625
0
        struct ggml_tensor  * a) {
2626
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_STEP);
2627
0
}
2628
2629
// ggml_tanh
2630
2631
struct ggml_tensor * ggml_tanh(
2632
        struct ggml_context * ctx,
2633
0
        struct ggml_tensor  * a) {
2634
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TANH);
2635
0
}
2636
2637
struct ggml_tensor * ggml_tanh_inplace(
2638
        struct ggml_context * ctx,
2639
0
        struct ggml_tensor  * a) {
2640
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TANH);
2641
0
}
2642
2643
// ggml_elu
2644
2645
struct ggml_tensor * ggml_elu(
2646
    struct ggml_context * ctx,
2647
0
    struct ggml_tensor  * a) {
2648
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ELU);
2649
0
}
2650
2651
struct ggml_tensor * ggml_elu_inplace(
2652
    struct ggml_context * ctx,
2653
0
    struct ggml_tensor  * a) {
2654
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ELU);
2655
0
}
2656
2657
// ggml_relu
2658
2659
struct ggml_tensor * ggml_relu(
2660
        struct ggml_context * ctx,
2661
0
        struct ggml_tensor  * a) {
2662
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_RELU);
2663
0
}
2664
2665
struct ggml_tensor * ggml_relu_inplace(
2666
        struct ggml_context * ctx,
2667
0
        struct ggml_tensor  * a) {
2668
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_RELU);
2669
0
}
2670
2671
// ggml_leaky_relu
2672
2673
struct ggml_tensor * ggml_leaky_relu(
2674
        struct ggml_context * ctx,
2675
        struct ggml_tensor  * a,
2676
        float                 negative_slope,
2677
0
        bool                  inplace) {
2678
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
2679
2680
0
    ggml_set_op_params(result, &negative_slope, sizeof(negative_slope));
2681
2682
0
    result->op     = GGML_OP_LEAKY_RELU;
2683
0
    result->src[0] = a;
2684
2685
0
    return result;
2686
0
}
2687
2688
// ggml_sigmoid
2689
2690
struct ggml_tensor * ggml_sigmoid(
2691
        struct ggml_context * ctx,
2692
0
        struct ggml_tensor  * a) {
2693
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SIGMOID);
2694
0
}
2695
2696
struct ggml_tensor * ggml_sigmoid_inplace(
2697
        struct ggml_context * ctx,
2698
0
        struct ggml_tensor  * a) {
2699
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SIGMOID);
2700
0
}
2701
2702
// ggml_gelu
2703
2704
struct ggml_tensor * ggml_gelu(
2705
        struct ggml_context * ctx,
2706
0
        struct ggml_tensor  * a) {
2707
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU);
2708
0
}
2709
2710
struct ggml_tensor * ggml_gelu_inplace(
2711
        struct ggml_context * ctx,
2712
0
        struct ggml_tensor  * a) {
2713
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU);
2714
0
}
2715
2716
// ggml_gelu_erf
2717
2718
struct ggml_tensor * ggml_gelu_erf(
2719
        struct ggml_context * ctx,
2720
0
        struct ggml_tensor  * a) {
2721
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_ERF);
2722
0
}
2723
2724
struct ggml_tensor * ggml_gelu_erf_inplace(
2725
        struct ggml_context * ctx,
2726
0
        struct ggml_tensor  * a) {
2727
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_ERF);
2728
0
}
2729
2730
// ggml_gelu_quick
2731
2732
struct ggml_tensor * ggml_gelu_quick(
2733
        struct ggml_context * ctx,
2734
0
        struct ggml_tensor  * a) {
2735
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2736
0
}
2737
2738
struct ggml_tensor * ggml_gelu_quick_inplace(
2739
        struct ggml_context * ctx,
2740
0
        struct ggml_tensor  * a) {
2741
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_GELU_QUICK);
2742
0
}
2743
2744
// ggml_silu
2745
2746
struct ggml_tensor * ggml_silu(
2747
        struct ggml_context * ctx,
2748
0
        struct ggml_tensor  * a) {
2749
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_SILU);
2750
0
}
2751
2752
struct ggml_tensor * ggml_silu_inplace(
2753
        struct ggml_context * ctx,
2754
0
        struct ggml_tensor  * a) {
2755
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_SILU);
2756
0
}
2757
2758
// ggml_xielu
2759
2760
struct ggml_tensor * ggml_xielu(
2761
        struct ggml_context * ctx,
2762
        struct ggml_tensor  * a,
2763
        float alpha_n,
2764
        float alpha_p,
2765
        float beta,
2766
0
        float eps) {
2767
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2768
2769
0
    ggml_set_op_params_i32(result, 0, (int32_t) GGML_UNARY_OP_XIELU);
2770
0
    ggml_set_op_params_f32(result, 1, beta + ggml_compute_softplus_f32(alpha_n));
2771
0
    ggml_set_op_params_f32(result, 2, ggml_compute_softplus_f32(alpha_p));
2772
0
    ggml_set_op_params_f32(result, 3, beta);
2773
0
    ggml_set_op_params_f32(result, 4, eps);
2774
2775
0
    result->op     = GGML_OP_UNARY;
2776
0
    result->src[0] = a;
2777
2778
0
    return result;
2779
0
}
2780
2781
// ggml_silu_back
2782
2783
struct ggml_tensor * ggml_silu_back(
2784
        struct ggml_context * ctx,
2785
        struct ggml_tensor  * a,
2786
0
        struct ggml_tensor  * b) {
2787
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
2788
2789
0
    result->op     = GGML_OP_SILU_BACK;
2790
0
    result->src[0] = a;
2791
0
    result->src[1] = b;
2792
2793
0
    return result;
2794
0
}
2795
2796
// ggml hardswish
2797
2798
struct ggml_tensor * ggml_hardswish(
2799
        struct ggml_context * ctx,
2800
0
        struct ggml_tensor  * a) {
2801
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSWISH);
2802
0
}
2803
2804
// ggml hardsigmoid
2805
2806
struct ggml_tensor * ggml_hardsigmoid(
2807
        struct ggml_context * ctx,
2808
0
        struct ggml_tensor  * a) {
2809
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_HARDSIGMOID);
2810
0
}
2811
2812
// ggml exp
2813
2814
struct ggml_tensor * ggml_exp(
2815
        struct ggml_context * ctx,
2816
0
        struct ggml_tensor  * a) {
2817
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_EXP);
2818
0
}
2819
2820
struct ggml_tensor * ggml_exp_inplace(
2821
        struct ggml_context * ctx,
2822
0
        struct ggml_tensor  * a) {
2823
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP);
2824
0
}
2825
2826
// ggml_glu
2827
2828
static struct ggml_tensor * ggml_glu_impl(
2829
        struct ggml_context * ctx,
2830
        struct ggml_tensor  * a,
2831
        struct ggml_tensor  * b,
2832
        enum ggml_glu_op      op,
2833
0
        bool                  swapped) {
2834
0
    GGML_ASSERT(ggml_is_contiguous_1(a));
2835
2836
0
    if (b) {
2837
0
        GGML_ASSERT(ggml_is_contiguous_1(b));
2838
0
        GGML_ASSERT(ggml_are_same_shape(a, b));
2839
0
        GGML_ASSERT(a->type == b->type);
2840
0
    }
2841
2842
0
    int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i];
2843
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0);
2844
2845
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
2846
0
    ggml_set_op_params_i32(result, 1, (int32_t) swapped);
2847
2848
0
    result->op     = GGML_OP_GLU;
2849
0
    result->src[0] = a;
2850
0
    result->src[1] = b;
2851
2852
0
    return result;
2853
0
}
2854
2855
// ggml_floor
2856
2857
struct ggml_tensor * ggml_floor(
2858
        struct ggml_context * ctx,
2859
0
        struct ggml_tensor  * a) {
2860
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_FLOOR);
2861
0
}
2862
2863
struct ggml_tensor * ggml_floor_inplace(
2864
        struct ggml_context * ctx,
2865
0
        struct ggml_tensor  * a) {
2866
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_FLOOR);
2867
0
}
2868
2869
// ggml_ceil
2870
2871
struct ggml_tensor * ggml_ceil(
2872
        struct ggml_context * ctx,
2873
0
        struct ggml_tensor  * a) {
2874
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_CEIL);
2875
0
}
2876
2877
struct ggml_tensor * ggml_ceil_inplace(
2878
        struct ggml_context * ctx,
2879
0
        struct ggml_tensor  * a) {
2880
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_CEIL);
2881
0
}
2882
2883
//ggml_round
2884
2885
struct ggml_tensor * ggml_round(
2886
        struct ggml_context * ctx,
2887
0
        struct ggml_tensor  * a) {
2888
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_ROUND);
2889
0
}
2890
2891
struct ggml_tensor * ggml_round_inplace(
2892
        struct ggml_context * ctx,
2893
0
        struct ggml_tensor  * a) {
2894
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_ROUND);
2895
0
}
2896
2897
//ggml_trunc
2898
2899
struct ggml_tensor * ggml_trunc(
2900
        struct ggml_context * ctx,
2901
0
        struct ggml_tensor  * a) {
2902
0
    return ggml_unary(ctx, a, GGML_UNARY_OP_TRUNC);
2903
0
}
2904
2905
struct ggml_tensor * ggml_trunc_inplace(
2906
        struct ggml_context * ctx,
2907
0
        struct ggml_tensor  * a) {
2908
0
    return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_TRUNC);
2909
0
}
2910
2911
struct ggml_tensor * ggml_glu(
2912
        struct ggml_context * ctx,
2913
        struct ggml_tensor  * a,
2914
        enum ggml_glu_op      op,
2915
0
        bool                  swapped) {
2916
0
    return ggml_glu_impl(ctx, a, NULL, op, swapped);
2917
0
}
2918
2919
struct ggml_tensor * ggml_glu_split(
2920
        struct ggml_context * ctx,
2921
        struct ggml_tensor  * a,
2922
        struct ggml_tensor  * b,
2923
0
        enum ggml_glu_op      op) {
2924
0
    return ggml_glu_impl(ctx, a, b, op, false);
2925
0
}
2926
2927
// ggml_reglu
2928
2929
struct ggml_tensor * ggml_reglu(
2930
        struct ggml_context * ctx,
2931
0
        struct ggml_tensor  * a) {
2932
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false);
2933
0
}
2934
2935
struct ggml_tensor * ggml_reglu_swapped(
2936
        struct ggml_context * ctx,
2937
0
        struct ggml_tensor  * a) {
2938
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true);
2939
0
}
2940
2941
struct ggml_tensor * ggml_reglu_split(
2942
        struct ggml_context * ctx,
2943
        struct ggml_tensor  * a,
2944
0
        struct ggml_tensor  * b) {
2945
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false);
2946
0
}
2947
2948
// ggml_geglu
2949
2950
struct ggml_tensor * ggml_geglu(
2951
        struct ggml_context * ctx,
2952
0
        struct ggml_tensor  * a) {
2953
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false);
2954
0
}
2955
2956
struct ggml_tensor * ggml_geglu_swapped(
2957
        struct ggml_context * ctx,
2958
0
        struct ggml_tensor  * a) {
2959
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true);
2960
0
}
2961
2962
struct ggml_tensor * ggml_geglu_split(
2963
        struct ggml_context * ctx,
2964
        struct ggml_tensor  * a,
2965
0
        struct ggml_tensor  * b) {
2966
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false);
2967
0
}
2968
2969
// ggml_swiglu
2970
2971
struct ggml_tensor * ggml_swiglu(
2972
        struct ggml_context * ctx,
2973
0
        struct ggml_tensor  * a) {
2974
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false);
2975
0
}
2976
2977
struct ggml_tensor * ggml_swiglu_swapped(
2978
        struct ggml_context * ctx,
2979
0
        struct ggml_tensor  * a) {
2980
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true);
2981
0
}
2982
2983
struct ggml_tensor * ggml_swiglu_split(
2984
        struct ggml_context * ctx,
2985
        struct ggml_tensor  * a,
2986
0
        struct ggml_tensor  * b) {
2987
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false);
2988
0
}
2989
2990
// ggml_geglu_erf
2991
2992
struct ggml_tensor * ggml_geglu_erf(
2993
        struct ggml_context * ctx,
2994
0
        struct ggml_tensor  * a) {
2995
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false);
2996
0
}
2997
2998
struct ggml_tensor * ggml_geglu_erf_swapped(
2999
        struct ggml_context * ctx,
3000
0
        struct ggml_tensor  * a) {
3001
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true);
3002
0
}
3003
3004
struct ggml_tensor * ggml_geglu_erf_split(
3005
        struct ggml_context * ctx,
3006
        struct ggml_tensor  * a,
3007
0
        struct ggml_tensor  * b) {
3008
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false);
3009
0
}
3010
3011
// ggml_geglu_quick
3012
3013
struct ggml_tensor * ggml_geglu_quick(
3014
        struct ggml_context * ctx,
3015
0
        struct ggml_tensor  * a) {
3016
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false);
3017
0
}
3018
3019
struct ggml_tensor * ggml_geglu_quick_swapped(
3020
        struct ggml_context * ctx,
3021
0
        struct ggml_tensor  * a) {
3022
0
    return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true);
3023
0
}
3024
3025
struct ggml_tensor * ggml_geglu_quick_split(
3026
        struct ggml_context * ctx,
3027
        struct ggml_tensor  * a,
3028
0
        struct ggml_tensor  * b) {
3029
0
    return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false);
3030
0
}
3031
3032
struct ggml_tensor * ggml_swiglu_oai(
3033
        struct ggml_context * ctx,
3034
        struct ggml_tensor  * a,
3035
        struct ggml_tensor  * b,
3036
        float                 alpha,
3037
0
        float                 limit) {
3038
0
    struct ggml_tensor * result = ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU_OAI, false);
3039
0
    ggml_set_op_params_f32(result, 2, alpha);
3040
0
    ggml_set_op_params_f32(result, 3, limit);
3041
3042
0
    return result;
3043
0
}
3044
3045
// ggml_norm
3046
3047
static struct ggml_tensor * ggml_norm_impl(
3048
        struct ggml_context * ctx,
3049
        struct ggml_tensor  * a,
3050
        float                 eps,
3051
0
        bool                  inplace) {
3052
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3053
3054
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3055
3056
0
    result->op     = GGML_OP_NORM;
3057
0
    result->src[0] = a;
3058
3059
0
    return result;
3060
0
}
3061
3062
struct ggml_tensor * ggml_norm(
3063
        struct ggml_context * ctx,
3064
        struct ggml_tensor  * a,
3065
0
        float                 eps) {
3066
0
    return ggml_norm_impl(ctx, a, eps, false);
3067
0
}
3068
3069
struct ggml_tensor * ggml_norm_inplace(
3070
        struct ggml_context * ctx,
3071
        struct ggml_tensor  * a,
3072
0
        float                 eps) {
3073
0
    return ggml_norm_impl(ctx, a, eps, true);
3074
0
}
3075
3076
// ggml_rms_norm
3077
3078
static struct ggml_tensor * ggml_rms_norm_impl(
3079
        struct ggml_context * ctx,
3080
        struct ggml_tensor  * a,
3081
        float                 eps,
3082
0
        bool                  inplace) {
3083
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3084
3085
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3086
3087
0
    result->op     = GGML_OP_RMS_NORM;
3088
0
    result->src[0] = a;
3089
3090
0
    return result;
3091
0
}
3092
3093
struct ggml_tensor * ggml_rms_norm(
3094
        struct ggml_context * ctx,
3095
        struct ggml_tensor  * a,
3096
0
        float                 eps) {
3097
0
    return ggml_rms_norm_impl(ctx, a, eps, false);
3098
0
}
3099
3100
struct ggml_tensor * ggml_rms_norm_inplace(
3101
        struct ggml_context * ctx,
3102
        struct ggml_tensor  * a,
3103
0
        float                 eps) {
3104
0
    return ggml_rms_norm_impl(ctx, a, eps, true);
3105
0
}
3106
3107
// ggml_rms_norm_back
3108
3109
struct ggml_tensor * ggml_rms_norm_back(
3110
        struct ggml_context * ctx,
3111
        struct ggml_tensor  * a,
3112
        struct ggml_tensor  * b,
3113
0
        float                 eps) {
3114
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3115
3116
0
    ggml_set_op_params(result, &eps, sizeof(eps));
3117
3118
0
    result->op     = GGML_OP_RMS_NORM_BACK;
3119
0
    result->src[0] = a;
3120
0
    result->src[1] = b;
3121
3122
0
    return result;
3123
0
}
3124
3125
// ggml_group_norm
3126
3127
static struct ggml_tensor * ggml_group_norm_impl(
3128
        struct ggml_context * ctx,
3129
        struct ggml_tensor  * a,
3130
        int                   n_groups,
3131
        float                 eps,
3132
0
        bool                  inplace) {
3133
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3134
3135
0
    ggml_set_op_params_i32(result, 0, n_groups);
3136
0
    ggml_set_op_params_f32(result, 1, eps);
3137
3138
0
    result->op     = GGML_OP_GROUP_NORM;
3139
0
    result->src[0] = a;
3140
3141
0
    return result;
3142
0
}
3143
3144
struct ggml_tensor * ggml_group_norm(
3145
        struct ggml_context * ctx,
3146
        struct ggml_tensor  * a,
3147
        int                   n_groups,
3148
0
        float                 eps) {
3149
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, false);
3150
0
}
3151
3152
struct ggml_tensor * ggml_group_norm_inplace(
3153
        struct ggml_context * ctx,
3154
        struct ggml_tensor  * a,
3155
        int                   n_groups,
3156
0
        float                 eps) {
3157
0
    return ggml_group_norm_impl(ctx, a, n_groups, eps, true);
3158
0
}
3159
3160
// ggml_l2_norm
3161
3162
static struct ggml_tensor * ggml_l2_norm_impl(
3163
        struct ggml_context * ctx,
3164
        struct ggml_tensor  * a,
3165
        float                 eps,
3166
0
        bool                  inplace) {
3167
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3168
3169
0
    ggml_set_op_params_f32(result, 0, eps);
3170
3171
0
    result->op     = GGML_OP_L2_NORM;
3172
0
    result->src[0] = a;
3173
3174
0
    return result;
3175
0
}
3176
3177
struct ggml_tensor * ggml_l2_norm(
3178
        struct ggml_context * ctx,
3179
        struct ggml_tensor  * a,
3180
0
        float                 eps) {
3181
0
    return ggml_l2_norm_impl(ctx, a, eps, false);
3182
0
}
3183
3184
struct ggml_tensor * ggml_l2_norm_inplace(
3185
        struct ggml_context * ctx,
3186
        struct ggml_tensor  * a,
3187
0
        float                 eps) {
3188
0
    return ggml_l2_norm_impl(ctx, a, eps, true);
3189
0
}
3190
3191
// ggml_mul_mat
3192
3193
0
static inline bool ggml_can_mul_mat(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3194
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3195
3196
0
    return (t0->ne[0]           == t1->ne[0])  &&
3197
0
           (t1->ne[2]%t0->ne[2] == 0)          && // verify t0 is broadcastable
3198
0
           (t1->ne[3]%t0->ne[3] == 0);
3199
0
}
3200
3201
struct ggml_tensor * ggml_mul_mat(
3202
        struct ggml_context * ctx,
3203
        struct ggml_tensor  * a,
3204
0
        struct ggml_tensor  * b) {
3205
0
    GGML_ASSERT(ggml_can_mul_mat(a, b));
3206
0
    GGML_ASSERT(!ggml_is_transposed(a));
3207
3208
0
    const int64_t ne[4] = { a->ne[1], b->ne[1], b->ne[2], b->ne[3] };
3209
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3210
3211
0
    result->op     = GGML_OP_MUL_MAT;
3212
0
    result->src[0] = a;
3213
0
    result->src[1] = b;
3214
3215
0
    return result;
3216
0
}
3217
3218
void ggml_mul_mat_set_prec(
3219
        struct ggml_tensor * a,
3220
0
        enum ggml_prec       prec) {
3221
0
    GGML_ASSERT(a->op == GGML_OP_MUL_MAT);
3222
3223
0
    const int32_t prec_i32 = (int32_t) prec;
3224
3225
0
    ggml_set_op_params_i32(a, 0, prec_i32);
3226
0
}
3227
3228
// ggml_mul_mat_id
3229
3230
/*
3231
    c = ggml_mul_mat_id(ctx, as, b, ids);
3232
3233
    as  -> [cols, rows, n_expert]
3234
    b   -> [cols, n_expert_used, n_tokens]
3235
    ids -> [n_expert_used, n_tokens] (i32)
3236
    c   -> [rows, n_expert_used, n_tokens]
3237
3238
    in b, n_expert_used can be broadcasted to match the n_expert_used of ids
3239
3240
    c ~= as[:,:,i] @ b[:,i%r,t], i = ids[e,t] for all e,t in ids
3241
*/
3242
struct ggml_tensor * ggml_mul_mat_id(
3243
        struct ggml_context * ctx,
3244
        struct ggml_tensor  * as,
3245
        struct ggml_tensor  * b,
3246
0
        struct ggml_tensor  * ids) {
3247
0
    GGML_ASSERT(!ggml_is_transposed(as));
3248
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
3249
3250
0
    GGML_ASSERT(as->ne[3] == 1); // as is 3d (one matrix per expert)
3251
0
    GGML_ASSERT(b->ne[3] == 1); // b is 3d
3252
0
    GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1); // ids is 2d
3253
0
    GGML_ASSERT(ids->ne[1] == b->ne[2]); // must have an expert list per b row
3254
0
    GGML_ASSERT(as->ne[0] == b->ne[0]); // can_mul_mat
3255
0
    GGML_ASSERT(ids->ne[0] % b->ne[1] == 0); // can broadcast
3256
3257
0
    const int64_t ne[4] = { as->ne[1], ids->ne[0], b->ne[2], 1 };
3258
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3259
3260
0
    result->op     = GGML_OP_MUL_MAT_ID;
3261
0
    result->src[0] = as;
3262
0
    result->src[1] = b;
3263
0
    result->src[2] = ids;
3264
3265
0
    return result;
3266
0
}
3267
3268
// ggml_out_prod
3269
3270
0
static inline bool ggml_can_out_prod(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
3271
0
    static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
3272
3273
0
    return (t0->ne[1] == t1->ne[1])   &&
3274
0
           (t1->ne[2]%t0->ne[2] == 0) && // verify t0 is broadcastable
3275
0
           (t1->ne[3]%t0->ne[3] == 0);
3276
0
}
3277
3278
struct ggml_tensor * ggml_out_prod(
3279
        struct ggml_context * ctx,
3280
        struct ggml_tensor  * a,
3281
0
        struct ggml_tensor  * b) {
3282
0
    GGML_ASSERT(ggml_can_out_prod(a, b));
3283
0
    GGML_ASSERT(!ggml_is_transposed(a));
3284
3285
    // a is broadcastable to b for ne[2] and ne[3] -> use b->ne[2] and b->ne[3]
3286
0
    const int64_t ne[4] = { a->ne[0], b->ne[0], b->ne[2], b->ne[3] };
3287
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
3288
3289
0
    result->op     = GGML_OP_OUT_PROD;
3290
0
    result->src[0] = a;
3291
0
    result->src[1] = b;
3292
3293
0
    return result;
3294
0
}
3295
3296
// ggml_scale
3297
3298
static struct ggml_tensor * ggml_scale_impl(
3299
        struct ggml_context * ctx,
3300
        struct ggml_tensor  * a,
3301
        float                 s,
3302
        float                 b,
3303
0
        bool                  inplace) {
3304
0
    GGML_ASSERT(ggml_is_padded_1d(a));
3305
3306
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3307
3308
0
    float params[2] = { s, b };
3309
0
    ggml_set_op_params(result, &params, sizeof(params));
3310
3311
0
    result->op     = GGML_OP_SCALE;
3312
0
    result->src[0] = a;
3313
3314
0
    return result;
3315
0
}
3316
3317
struct ggml_tensor * ggml_scale(
3318
        struct ggml_context * ctx,
3319
        struct ggml_tensor  * a,
3320
0
        float                 s) {
3321
0
    return ggml_scale_impl(ctx, a, s, 0.0, false);
3322
0
}
3323
3324
struct ggml_tensor * ggml_scale_inplace(
3325
        struct ggml_context * ctx,
3326
        struct ggml_tensor  * a,
3327
0
        float                 s) {
3328
0
    return ggml_scale_impl(ctx, a, s, 0.0, true);
3329
0
}
3330
3331
struct ggml_tensor * ggml_scale_bias(
3332
        struct ggml_context * ctx,
3333
        struct ggml_tensor  * a,
3334
        float                 s,
3335
0
        float                 b) {
3336
0
    return ggml_scale_impl(ctx, a, s, b, false);
3337
0
}
3338
3339
struct ggml_tensor * ggml_scale_bias_inplace(
3340
        struct ggml_context * ctx,
3341
        struct ggml_tensor  * a,
3342
        float                 s,
3343
0
        float                 b) {
3344
0
    return ggml_scale_impl(ctx, a, s, b, true);
3345
0
}
3346
3347
// ggml_set
3348
3349
static struct ggml_tensor * ggml_set_impl(
3350
        struct ggml_context * ctx,
3351
        struct ggml_tensor  * a,
3352
        struct ggml_tensor  * b,
3353
        size_t                nb1,
3354
        size_t                nb2,
3355
        size_t                nb3,
3356
        size_t                offset,
3357
0
        bool                  inplace) {
3358
0
    GGML_ASSERT(ggml_nelements(a) >= ggml_nelements(b));
3359
3360
    // make a view of the destination
3361
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3362
3363
0
    GGML_ASSERT(offset < (size_t)(1 << 30));
3364
0
    int32_t params[] = { nb1, nb2, nb3, offset, inplace ? 1 : 0 };
3365
0
    ggml_set_op_params(result, params, sizeof(params));
3366
3367
0
    result->op     = GGML_OP_SET;
3368
0
    result->src[0] = a;
3369
0
    result->src[1] = b;
3370
3371
0
    return result;
3372
0
}
3373
3374
struct ggml_tensor * ggml_set(
3375
        struct ggml_context * ctx,
3376
        struct ggml_tensor  * a,
3377
        struct ggml_tensor  * b,
3378
        size_t                nb1,
3379
        size_t                nb2,
3380
        size_t                nb3,
3381
0
        size_t                offset) {
3382
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, false);
3383
0
}
3384
3385
struct ggml_tensor * ggml_set_inplace(
3386
        struct ggml_context * ctx,
3387
        struct ggml_tensor  * a,
3388
        struct ggml_tensor  * b,
3389
        size_t                nb1,
3390
        size_t                nb2,
3391
        size_t                nb3,
3392
0
        size_t                offset) {
3393
0
    return ggml_set_impl(ctx, a, b, nb1, nb2, nb3, offset, true);
3394
0
}
3395
3396
struct ggml_tensor * ggml_set_1d(
3397
        struct ggml_context * ctx,
3398
        struct ggml_tensor  * a,
3399
        struct ggml_tensor  * b,
3400
0
        size_t                offset) {
3401
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, false);
3402
0
}
3403
3404
struct ggml_tensor * ggml_set_1d_inplace(
3405
        struct ggml_context * ctx,
3406
        struct ggml_tensor  * a,
3407
        struct ggml_tensor  * b,
3408
0
        size_t                offset) {
3409
0
    return ggml_set_impl(ctx, a, b, a->nb[1], a->nb[2], a->nb[3], offset, true);
3410
0
}
3411
3412
struct ggml_tensor * ggml_set_2d(
3413
        struct ggml_context * ctx,
3414
        struct ggml_tensor  * a,
3415
        struct ggml_tensor  * b,
3416
        size_t                nb1,
3417
0
        size_t                offset) {
3418
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, false);
3419
0
}
3420
3421
struct ggml_tensor * ggml_set_2d_inplace(
3422
        struct ggml_context * ctx,
3423
        struct ggml_tensor  * a,
3424
        struct ggml_tensor  * b,
3425
        size_t                nb1,
3426
0
        size_t                offset) {
3427
0
    return ggml_set_impl(ctx, a, b, nb1, a->nb[2], a->nb[3], offset, true);
3428
0
}
3429
3430
// ggml_cpy
3431
3432
static struct ggml_tensor * ggml_cpy_impl(
3433
        struct ggml_context * ctx,
3434
        struct ggml_tensor  * a,
3435
0
        struct ggml_tensor  * b) {
3436
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3437
3438
    // make a view of the destination
3439
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, b);
3440
0
    if (strlen(b->name) > 0) {
3441
0
        ggml_format_name(result, "%s (copy of %s)", b->name, a->name);
3442
0
    } else {
3443
0
        ggml_format_name(result, "%s (copy)", a->name);
3444
0
    }
3445
3446
0
    result->op     = GGML_OP_CPY;
3447
0
    result->src[0] = a;
3448
0
    result->src[1] = b;
3449
3450
0
    return result;
3451
0
}
3452
3453
struct ggml_tensor * ggml_cpy(
3454
        struct ggml_context * ctx,
3455
        struct ggml_tensor * a,
3456
0
        struct ggml_tensor * b) {
3457
0
    return ggml_cpy_impl(ctx, a, b);
3458
0
}
3459
3460
struct ggml_tensor * ggml_cast(
3461
        struct ggml_context * ctx,
3462
        struct ggml_tensor  * a,
3463
0
        enum   ggml_type      type) {
3464
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, type, GGML_MAX_DIMS, a->ne);
3465
0
    ggml_format_name(result, "%s (copy)", a->name);
3466
3467
0
    result->op     = GGML_OP_CPY;
3468
0
    result->src[0] = a;
3469
0
    result->src[1] = result; // note: this self-reference might seem redundant, but it's actually needed by some
3470
                             //       backends for consistency with ggml_cpy_impl() above
3471
3472
0
    return result;
3473
0
}
3474
3475
// ggml_cont
3476
3477
static struct ggml_tensor * ggml_cont_impl(
3478
        struct ggml_context * ctx,
3479
0
        struct ggml_tensor  * a) {
3480
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
3481
0
    ggml_format_name(result, "%s (cont)", a->name);
3482
3483
0
    result->op     = GGML_OP_CONT;
3484
0
    result->src[0] = a;
3485
3486
0
    return result;
3487
0
}
3488
3489
struct ggml_tensor * ggml_cont(
3490
        struct ggml_context * ctx,
3491
0
        struct ggml_tensor * a) {
3492
0
    return ggml_cont_impl(ctx, a);
3493
0
}
3494
3495
// make contiguous, with new shape
3496
GGML_API struct ggml_tensor * ggml_cont_1d(
3497
        struct ggml_context * ctx,
3498
        struct ggml_tensor  * a,
3499
0
        int64_t               ne0) {
3500
0
    return ggml_cont_4d(ctx, a, ne0, 1, 1, 1);
3501
0
}
3502
3503
GGML_API struct ggml_tensor * ggml_cont_2d(
3504
        struct ggml_context * ctx,
3505
        struct ggml_tensor  * a,
3506
        int64_t               ne0,
3507
0
        int64_t               ne1) {
3508
0
    return ggml_cont_4d(ctx, a, ne0, ne1, 1, 1);
3509
0
}
3510
3511
GGML_API struct ggml_tensor * ggml_cont_3d(
3512
        struct ggml_context * ctx,
3513
        struct ggml_tensor  * a,
3514
        int64_t               ne0,
3515
        int64_t               ne1,
3516
0
        int64_t               ne2) {
3517
0
    return ggml_cont_4d(ctx, a, ne0, ne1, ne2, 1);
3518
0
}
3519
3520
struct ggml_tensor * ggml_cont_4d(
3521
        struct ggml_context * ctx,
3522
        struct ggml_tensor  * a,
3523
        int64_t               ne0,
3524
        int64_t               ne1,
3525
        int64_t               ne2,
3526
0
        int64_t               ne3) {
3527
0
    GGML_ASSERT(ggml_nelements(a) == (ne0*ne1*ne2*ne3));
3528
3529
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
3530
0
    ggml_format_name(result, "%s (cont)", a->name);
3531
3532
0
    result->op     = GGML_OP_CONT;
3533
0
    result->src[0] = a;
3534
3535
0
    return result;
3536
0
}
3537
3538
// ggml_reshape
3539
3540
struct ggml_tensor * ggml_reshape(
3541
        struct ggml_context * ctx,
3542
        struct ggml_tensor * a,
3543
0
        struct ggml_tensor * b) {
3544
0
    GGML_ASSERT(ggml_is_contiguous(a));
3545
    // as only the shape of b is relevant, and not its memory layout, b is allowed to be non contiguous.
3546
0
    GGML_ASSERT(ggml_nelements(a) == ggml_nelements(b));
3547
3548
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b->ne, a, 0);
3549
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3550
3551
0
    result->op     = GGML_OP_RESHAPE;
3552
0
    result->src[0] = a;
3553
3554
0
    return result;
3555
0
}
3556
3557
struct ggml_tensor * ggml_reshape_1d(
3558
        struct ggml_context * ctx,
3559
        struct ggml_tensor  * a,
3560
0
        int64_t               ne0) {
3561
0
    GGML_ASSERT(ggml_is_contiguous(a));
3562
0
    GGML_ASSERT(ggml_nelements(a) == ne0);
3563
3564
0
    const int64_t ne[1] = { ne0 };
3565
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0);
3566
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3567
3568
0
    result->op     = GGML_OP_RESHAPE;
3569
0
    result->src[0] = a;
3570
3571
0
    return result;
3572
0
}
3573
3574
struct ggml_tensor * ggml_reshape_2d(
3575
        struct ggml_context * ctx,
3576
        struct ggml_tensor  * a,
3577
        int64_t               ne0,
3578
0
        int64_t               ne1) {
3579
0
    GGML_ASSERT(ggml_is_contiguous(a));
3580
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1);
3581
3582
0
    const int64_t ne[2] = { ne0, ne1 };
3583
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0);
3584
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3585
3586
0
    result->op     = GGML_OP_RESHAPE;
3587
0
    result->src[0] = a;
3588
3589
0
    return result;
3590
0
}
3591
3592
struct ggml_tensor * ggml_reshape_3d(
3593
        struct ggml_context * ctx,
3594
        struct ggml_tensor  * a,
3595
        int64_t               ne0,
3596
        int64_t               ne1,
3597
0
        int64_t               ne2) {
3598
0
    GGML_ASSERT(ggml_is_contiguous(a));
3599
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2);
3600
3601
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3602
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0);
3603
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3604
3605
0
    result->op     = GGML_OP_RESHAPE;
3606
0
    result->src[0] = a;
3607
3608
0
    return result;
3609
0
}
3610
3611
struct ggml_tensor * ggml_reshape_4d(
3612
        struct ggml_context * ctx,
3613
        struct ggml_tensor  * a,
3614
        int64_t               ne0,
3615
        int64_t               ne1,
3616
        int64_t               ne2,
3617
0
        int64_t               ne3) {
3618
0
    GGML_ASSERT(ggml_is_contiguous(a));
3619
0
    GGML_ASSERT(ggml_nelements(a) == ne0*ne1*ne2*ne3);
3620
3621
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3622
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0);
3623
0
    ggml_format_name(result, "%s (reshaped)", a->name);
3624
3625
0
    result->op     = GGML_OP_RESHAPE;
3626
0
    result->src[0] = a;
3627
3628
0
    return result;
3629
0
}
3630
3631
static struct ggml_tensor * ggml_view_impl(
3632
        struct ggml_context * ctx,
3633
        struct ggml_tensor  * a,
3634
        int                   n_dims,
3635
        const int64_t       * ne,
3636
0
        size_t                offset) {
3637
0
    struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset);
3638
0
    ggml_format_name(result, "%s (view)", a->name);
3639
3640
0
    ggml_set_op_params(result, &offset, sizeof(offset));
3641
3642
0
    result->op     = GGML_OP_VIEW;
3643
0
    result->src[0] = a;
3644
3645
0
    return result;
3646
0
}
3647
3648
// ggml_view_1d
3649
3650
struct ggml_tensor * ggml_view_1d(
3651
        struct ggml_context * ctx,
3652
        struct ggml_tensor  * a,
3653
        int64_t               ne0,
3654
0
        size_t                offset) {
3655
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset);
3656
3657
0
    return result;
3658
0
}
3659
3660
// ggml_view_2d
3661
3662
struct ggml_tensor * ggml_view_2d(
3663
        struct ggml_context * ctx,
3664
        struct ggml_tensor  * a,
3665
        int64_t               ne0,
3666
        int64_t               ne1,
3667
        size_t                nb1,
3668
0
        size_t                offset) {
3669
0
    const int64_t ne[2] = { ne0, ne1 };
3670
3671
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset);
3672
3673
0
    result->nb[1] = nb1;
3674
0
    result->nb[2] = result->nb[1]*ne1;
3675
0
    result->nb[3] = result->nb[2];
3676
3677
0
    return result;
3678
0
}
3679
3680
// ggml_view_3d
3681
3682
struct ggml_tensor * ggml_view_3d(
3683
        struct ggml_context * ctx,
3684
        struct ggml_tensor  * a,
3685
        int64_t               ne0,
3686
        int64_t               ne1,
3687
        int64_t               ne2,
3688
        size_t                nb1,
3689
        size_t                nb2,
3690
0
        size_t                offset) {
3691
0
    const int64_t ne[3] = { ne0, ne1, ne2 };
3692
3693
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset);
3694
3695
0
    result->nb[1] = nb1;
3696
0
    result->nb[2] = nb2;
3697
0
    result->nb[3] = result->nb[2]*ne2;
3698
3699
0
    return result;
3700
0
}
3701
3702
// ggml_view_4d
3703
3704
struct ggml_tensor * ggml_view_4d(
3705
        struct ggml_context * ctx,
3706
        struct ggml_tensor  * a,
3707
        int64_t               ne0,
3708
        int64_t               ne1,
3709
        int64_t               ne2,
3710
        int64_t               ne3,
3711
        size_t                nb1,
3712
        size_t                nb2,
3713
        size_t                nb3,
3714
0
        size_t                offset) {
3715
0
    const int64_t ne[4] = { ne0, ne1, ne2, ne3 };
3716
3717
0
    struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset);
3718
3719
0
    result->nb[1] = nb1;
3720
0
    result->nb[2] = nb2;
3721
0
    result->nb[3] = nb3;
3722
3723
0
    return result;
3724
0
}
3725
3726
// ggml_permute
3727
3728
struct ggml_tensor * ggml_permute(
3729
        struct ggml_context * ctx,
3730
        struct ggml_tensor  * a,
3731
        int                   axis0,
3732
        int                   axis1,
3733
        int                   axis2,
3734
0
        int                   axis3) {
3735
0
    GGML_ASSERT(axis0 >= 0 && axis0 < GGML_MAX_DIMS);
3736
0
    GGML_ASSERT(axis1 >= 0 && axis1 < GGML_MAX_DIMS);
3737
0
    GGML_ASSERT(axis2 >= 0 && axis2 < GGML_MAX_DIMS);
3738
0
    GGML_ASSERT(axis3 >= 0 && axis3 < GGML_MAX_DIMS);
3739
3740
0
    GGML_ASSERT(axis0 != axis1);
3741
0
    GGML_ASSERT(axis0 != axis2);
3742
0
    GGML_ASSERT(axis0 != axis3);
3743
0
    GGML_ASSERT(axis1 != axis2);
3744
0
    GGML_ASSERT(axis1 != axis3);
3745
0
    GGML_ASSERT(axis2 != axis3);
3746
3747
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3748
0
    ggml_format_name(result, "%s (permuted)", a->name);
3749
3750
0
    int ne[GGML_MAX_DIMS];
3751
0
    int nb[GGML_MAX_DIMS];
3752
3753
0
    ne[axis0] = a->ne[0];
3754
0
    ne[axis1] = a->ne[1];
3755
0
    ne[axis2] = a->ne[2];
3756
0
    ne[axis3] = a->ne[3];
3757
3758
0
    nb[axis0] = a->nb[0];
3759
0
    nb[axis1] = a->nb[1];
3760
0
    nb[axis2] = a->nb[2];
3761
0
    nb[axis3] = a->nb[3];
3762
3763
0
    result->ne[0] = ne[0];
3764
0
    result->ne[1] = ne[1];
3765
0
    result->ne[2] = ne[2];
3766
0
    result->ne[3] = ne[3];
3767
3768
0
    result->nb[0] = nb[0];
3769
0
    result->nb[1] = nb[1];
3770
0
    result->nb[2] = nb[2];
3771
0
    result->nb[3] = nb[3];
3772
3773
0
    result->op     = GGML_OP_PERMUTE;
3774
0
    result->src[0] = a;
3775
3776
0
    int32_t params[] = { axis0, axis1, axis2, axis3 };
3777
0
    ggml_set_op_params(result, params, sizeof(params));
3778
3779
0
    return result;
3780
0
}
3781
3782
// ggml_transpose
3783
3784
struct ggml_tensor * ggml_transpose(
3785
        struct ggml_context * ctx,
3786
0
        struct ggml_tensor  * a) {
3787
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3788
0
    ggml_format_name(result, "%s (transposed)", a->name);
3789
3790
0
    result->ne[0] = a->ne[1];
3791
0
    result->ne[1] = a->ne[0];
3792
3793
0
    result->nb[0] = a->nb[1];
3794
0
    result->nb[1] = a->nb[0];
3795
3796
0
    result->op     = GGML_OP_TRANSPOSE;
3797
0
    result->src[0] = a;
3798
3799
0
    return result;
3800
0
}
3801
3802
// ggml_get_rows
3803
3804
struct ggml_tensor * ggml_get_rows(
3805
        struct ggml_context * ctx,
3806
        struct ggml_tensor  * a,
3807
0
        struct ggml_tensor  * b) {
3808
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
3809
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
3810
0
    GGML_ASSERT(b->ne[3] == 1);
3811
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
3812
3813
    // TODO: implement non F32 return
3814
0
    enum ggml_type type = GGML_TYPE_F32;
3815
0
    if (a->type == GGML_TYPE_I32) {
3816
0
        type = a->type;
3817
0
    }
3818
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, a->ne[0], b->ne[0], b->ne[1], b->ne[2]);
3819
3820
0
    result->op     = GGML_OP_GET_ROWS;
3821
0
    result->src[0] = a;
3822
0
    result->src[1] = b;
3823
3824
0
    return result;
3825
0
}
3826
3827
// ggml_get_rows_back
3828
3829
struct ggml_tensor * ggml_get_rows_back(
3830
        struct ggml_context * ctx,
3831
        struct ggml_tensor  * a,
3832
        struct ggml_tensor  * b,
3833
0
        struct ggml_tensor  * c) {
3834
0
    GGML_ASSERT(ggml_is_matrix(a) && ggml_is_vector(b) && b->type == GGML_TYPE_I32);
3835
0
    GGML_ASSERT(ggml_is_matrix(c) && (a->ne[0] == c->ne[0]));
3836
3837
    // TODO: implement non F32 return
3838
    //struct ggml_tensor * result = ggml_new_tensor_2d(ctx, a->type, a->ne[0], b->ne[0]);
3839
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, c->ne[0], c->ne[1]);
3840
3841
0
    result->op     = GGML_OP_GET_ROWS_BACK;
3842
0
    result->src[0] = a;
3843
0
    result->src[1] = b;
3844
3845
0
    return result;
3846
0
}
3847
3848
// ggml_set_rows
3849
3850
struct ggml_tensor * ggml_set_rows(
3851
        struct ggml_context * ctx,
3852
        struct ggml_tensor  * a,
3853
        struct ggml_tensor  * b,
3854
0
        struct ggml_tensor  * c) {
3855
0
    GGML_ASSERT(a->ne[0] == b->ne[0]);
3856
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
3857
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
3858
0
    GGML_ASSERT(b->ne[1] == c->ne[0]);
3859
0
    GGML_ASSERT(b->ne[2] % c->ne[1] == 0);
3860
0
    GGML_ASSERT(b->ne[3] % c->ne[2] == 0);
3861
0
    GGML_ASSERT(c->ne[3] == 1);
3862
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
3863
0
    GGML_ASSERT(c->type == GGML_TYPE_I64 || c->type == GGML_TYPE_I32);
3864
3865
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
3866
0
    GGML_ASSERT(ggml_is_contiguous_rows(b));
3867
3868
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
3869
3870
0
    result->op     = GGML_OP_SET_ROWS;
3871
0
    result->src[0] = b;
3872
0
    result->src[1] = c;
3873
0
    result->src[2] = a; // note: order is weird due to legacy reasons (https://github.com/ggml-org/llama.cpp/pull/16063#discussion_r2385795931)
3874
3875
0
    return result;
3876
0
}
3877
3878
// ggml_diag
3879
3880
struct ggml_tensor * ggml_diag(
3881
        struct ggml_context * ctx,
3882
0
        struct ggml_tensor  * a) {
3883
0
    GGML_ASSERT(a->ne[1] == 1);
3884
3885
0
    const int64_t ne[4] = { a->ne[0], a->ne[0], a->ne[2], a->ne[3] };
3886
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, a->type, 4, ne);
3887
3888
0
    result->op     = GGML_OP_DIAG;
3889
0
    result->src[0] = a;
3890
3891
0
    return result;
3892
0
}
3893
3894
// ggml_diag_mask_inf
3895
3896
static struct ggml_tensor * ggml_diag_mask_inf_impl(
3897
        struct ggml_context * ctx,
3898
        struct ggml_tensor  * a,
3899
        int                   n_past,
3900
0
        bool                  inplace) {
3901
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3902
3903
0
    int32_t params[] = { n_past };
3904
0
    ggml_set_op_params(result, params, sizeof(params));
3905
3906
0
    result->op     = GGML_OP_DIAG_MASK_INF;
3907
0
    result->src[0] = a;
3908
3909
0
    return result;
3910
0
}
3911
3912
struct ggml_tensor * ggml_diag_mask_inf(
3913
        struct ggml_context * ctx,
3914
        struct ggml_tensor  * a,
3915
0
        int                   n_past) {
3916
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, false);
3917
0
}
3918
3919
struct ggml_tensor * ggml_diag_mask_inf_inplace(
3920
        struct ggml_context * ctx,
3921
        struct ggml_tensor  * a,
3922
0
        int                   n_past) {
3923
0
    return ggml_diag_mask_inf_impl(ctx, a, n_past, true);
3924
0
}
3925
3926
// ggml_diag_mask_zero
3927
3928
static struct ggml_tensor * ggml_diag_mask_zero_impl(
3929
        struct ggml_context * ctx,
3930
        struct ggml_tensor  * a,
3931
        int                   n_past,
3932
0
        bool                  inplace) {
3933
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3934
3935
0
    int32_t params[] = { n_past };
3936
0
    ggml_set_op_params(result, params, sizeof(params));
3937
3938
0
    result->op     = GGML_OP_DIAG_MASK_ZERO;
3939
0
    result->src[0] = a;
3940
3941
0
    return result;
3942
0
}
3943
3944
struct ggml_tensor * ggml_diag_mask_zero(
3945
        struct ggml_context * ctx,
3946
        struct ggml_tensor  * a,
3947
0
        int                   n_past) {
3948
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, false);
3949
0
}
3950
3951
struct ggml_tensor * ggml_diag_mask_zero_inplace(
3952
        struct ggml_context * ctx,
3953
        struct ggml_tensor  * a,
3954
0
        int                   n_past) {
3955
0
    return ggml_diag_mask_zero_impl(ctx, a, n_past, true);
3956
0
}
3957
3958
// ggml_soft_max
3959
3960
static struct ggml_tensor * ggml_soft_max_impl(
3961
        struct ggml_context * ctx,
3962
        struct ggml_tensor  * a,
3963
        struct ggml_tensor  * mask,
3964
        float                 scale,
3965
        float                 max_bias,
3966
0
        bool                  inplace) {
3967
0
    GGML_ASSERT(ggml_is_contiguous(a));
3968
3969
0
    if (mask) {
3970
0
        GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32);
3971
0
        GGML_ASSERT(ggml_is_contiguous(mask));
3972
0
        GGML_ASSERT(mask->ne[0] == a->ne[0]);
3973
0
        GGML_ASSERT(mask->ne[1] >= a->ne[1]);
3974
0
        GGML_ASSERT(a->ne[2]%mask->ne[2] == 0);
3975
0
        GGML_ASSERT(a->ne[3]%mask->ne[3] == 0);
3976
0
    }
3977
3978
0
    if (max_bias > 0.0f) {
3979
0
        GGML_ASSERT(mask);
3980
0
    }
3981
3982
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
3983
3984
0
    float params[] = { scale, max_bias };
3985
0
    ggml_set_op_params(result, params, sizeof(params));
3986
3987
0
    result->op     = GGML_OP_SOFT_MAX;
3988
0
    result->src[0] = a;
3989
0
    result->src[1] = mask;
3990
3991
0
    return result;
3992
0
}
3993
3994
struct ggml_tensor * ggml_soft_max(
3995
        struct ggml_context * ctx,
3996
0
        struct ggml_tensor  * a) {
3997
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, false);
3998
0
}
3999
4000
struct ggml_tensor * ggml_soft_max_inplace(
4001
        struct ggml_context * ctx,
4002
0
        struct ggml_tensor  * a) {
4003
0
    return ggml_soft_max_impl(ctx, a, NULL, 1.0f, 0.0f, true);
4004
0
}
4005
4006
struct ggml_tensor * ggml_soft_max_ext(
4007
        struct ggml_context * ctx,
4008
        struct ggml_tensor  * a,
4009
        struct ggml_tensor  * mask,
4010
        float                 scale,
4011
0
        float                 max_bias) {
4012
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
4013
0
}
4014
4015
struct ggml_tensor * ggml_soft_max_ext_inplace(
4016
        struct ggml_context * ctx,
4017
        struct ggml_tensor  * a,
4018
        struct ggml_tensor  * mask,
4019
        float                 scale,
4020
0
        float                 max_bias) {
4021
0
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
4022
0
}
4023
4024
void ggml_soft_max_add_sinks(
4025
        struct ggml_tensor * a,
4026
0
        struct ggml_tensor * sinks) {
4027
0
    if (!sinks) {
4028
0
        a->src[2] = NULL;
4029
0
        return;
4030
0
    }
4031
4032
0
    GGML_ASSERT(a->op == GGML_OP_SOFT_MAX);
4033
0
    GGML_ASSERT(a->src[2] == NULL);
4034
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
4035
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
4036
4037
0
    a->src[2] = sinks;
4038
0
}
4039
4040
// ggml_soft_max_ext_back
4041
4042
static struct ggml_tensor * ggml_soft_max_ext_back_impl(
4043
        struct ggml_context * ctx,
4044
        struct ggml_tensor  * a,
4045
        struct ggml_tensor  * b,
4046
        float                 scale,
4047
        float                 max_bias,
4048
0
        bool                  inplace) {
4049
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4050
4051
0
    result->op     = GGML_OP_SOFT_MAX_BACK;
4052
0
    result->src[0] = a;
4053
0
    result->src[1] = b;
4054
4055
0
    memcpy((float *) result->op_params + 0, &scale,    sizeof(float));
4056
0
    memcpy((float *) result->op_params + 1, &max_bias, sizeof(float));
4057
4058
0
    return result;
4059
0
}
4060
4061
struct ggml_tensor * ggml_soft_max_ext_back(
4062
        struct ggml_context * ctx,
4063
        struct ggml_tensor  * a,
4064
        struct ggml_tensor  * b,
4065
        float                 scale,
4066
0
        float                 max_bias) {
4067
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, false);
4068
0
}
4069
4070
struct ggml_tensor * ggml_soft_max_ext_back_inplace(
4071
        struct ggml_context * ctx,
4072
        struct ggml_tensor  * a,
4073
        struct ggml_tensor  * b,
4074
        float                 scale,
4075
0
        float                 max_bias) {
4076
0
    return ggml_soft_max_ext_back_impl(ctx, a, b, scale, max_bias, true);
4077
0
}
4078
4079
// ggml_rope
4080
4081
static struct ggml_tensor * ggml_rope_impl(
4082
        struct ggml_context * ctx,
4083
        struct ggml_tensor  * a,
4084
        struct ggml_tensor  * b,
4085
        struct ggml_tensor  * c,
4086
        int                   n_dims,
4087
        int                   sections[GGML_MROPE_SECTIONS],
4088
        int                   mode,
4089
        int                   n_ctx_orig,
4090
        float                 freq_base,
4091
        float                 freq_scale,
4092
        float                 ext_factor,
4093
        float                 attn_factor,
4094
        float                 beta_fast,
4095
        float                 beta_slow,
4096
0
        bool                  inplace) {
4097
0
    GGML_ASSERT((mode & 1) == 0 && "mode & 1 == 1 is no longer supported");
4098
4099
0
    GGML_ASSERT(ggml_is_vector(b));
4100
0
    GGML_ASSERT(b->type == GGML_TYPE_I32);
4101
4102
0
    bool mrope_used = mode & GGML_ROPE_TYPE_MROPE;
4103
0
    if (mrope_used) {
4104
0
        GGML_ASSERT(a->ne[2] * 4 == b->ne[0]); // mrope expecting 4 position ids per token
4105
0
    } else {
4106
0
        GGML_ASSERT(a->ne[2] == b->ne[0]);
4107
0
    }
4108
4109
0
    if (c) {
4110
0
        GGML_ASSERT(c->type == GGML_TYPE_F32);
4111
0
        GGML_ASSERT(c->ne[0] >= n_dims / 2);
4112
0
    }
4113
4114
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
4115
4116
0
    int32_t params[15] = { /*n_past*/ 0, n_dims, mode, /*n_ctx*/ 0, n_ctx_orig };
4117
0
    memcpy(params +  5, &freq_base,    sizeof(float));
4118
0
    memcpy(params +  6, &freq_scale,   sizeof(float));
4119
0
    memcpy(params +  7, &ext_factor,   sizeof(float));
4120
0
    memcpy(params +  8, &attn_factor,  sizeof(float));
4121
0
    memcpy(params +  9, &beta_fast,    sizeof(float));
4122
0
    memcpy(params + 10, &beta_slow,    sizeof(float));
4123
0
    if (mrope_used && sections) {
4124
0
        memcpy(params + 11, sections,  sizeof(int32_t) * GGML_MROPE_SECTIONS);
4125
0
    } else {
4126
0
        memset(params + 11, 0,         sizeof(int32_t) * GGML_MROPE_SECTIONS);
4127
0
    }
4128
0
    ggml_set_op_params(result, params, sizeof(params));
4129
4130
0
    result->op     = GGML_OP_ROPE;
4131
0
    result->src[0] = a;
4132
0
    result->src[1] = b;
4133
0
    result->src[2] = c;
4134
4135
0
    return result;
4136
0
}
4137
4138
struct ggml_tensor * ggml_rope(
4139
        struct ggml_context * ctx,
4140
        struct ggml_tensor  * a,
4141
        struct ggml_tensor  * b,
4142
        int                   n_dims,
4143
0
        int                   mode) {
4144
0
    return ggml_rope_impl(
4145
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, false
4146
0
    );
4147
0
}
4148
4149
struct ggml_tensor * ggml_rope_multi(
4150
        struct ggml_context * ctx,
4151
        struct ggml_tensor  * a,
4152
        struct ggml_tensor  * b,
4153
        struct ggml_tensor  * c,
4154
        int                   n_dims,
4155
        int                   sections[GGML_MROPE_SECTIONS],
4156
        int                   mode,
4157
        int                   n_ctx_orig,
4158
        float                 freq_base,
4159
        float                 freq_scale,
4160
        float                 ext_factor,
4161
        float                 attn_factor,
4162
        float                 beta_fast,
4163
0
        float                 beta_slow) {
4164
0
    return ggml_rope_impl(
4165
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4166
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4167
0
    );
4168
0
}
4169
4170
struct ggml_tensor * ggml_rope_multi_inplace(
4171
        struct ggml_context * ctx,
4172
        struct ggml_tensor  * a,
4173
        struct ggml_tensor  * b,
4174
        struct ggml_tensor  * c,
4175
        int                   n_dims,
4176
        int                   sections[GGML_MROPE_SECTIONS],
4177
        int                   mode,
4178
        int                   n_ctx_orig,
4179
        float                 freq_base,
4180
        float                 freq_scale,
4181
        float                 ext_factor,
4182
        float                 attn_factor,
4183
        float                 beta_fast,
4184
0
        float                 beta_slow) {
4185
0
    return ggml_rope_impl(
4186
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale,
4187
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4188
0
    );
4189
0
}
4190
4191
struct ggml_tensor * ggml_rope_inplace(
4192
        struct ggml_context * ctx,
4193
        struct ggml_tensor  * a,
4194
        struct ggml_tensor  * b,
4195
        int                   n_dims,
4196
0
        int                   mode) {
4197
0
    return ggml_rope_impl(
4198
0
        ctx, a, b, NULL, n_dims, NULL, mode, 0, 10000.0f, 1.0f, 0.0f, 1.0f, 0.0f, 0.0f, true
4199
0
    );
4200
0
}
4201
4202
struct ggml_tensor * ggml_rope_ext(
4203
        struct ggml_context * ctx,
4204
        struct ggml_tensor  * a,
4205
        struct ggml_tensor  * b,
4206
        struct ggml_tensor  * c,
4207
        int                   n_dims,
4208
        int                   mode,
4209
        int                   n_ctx_orig,
4210
        float                 freq_base,
4211
        float                 freq_scale,
4212
        float                 ext_factor,
4213
        float                 attn_factor,
4214
        float                 beta_fast,
4215
0
        float                 beta_slow) {
4216
0
    return ggml_rope_impl(
4217
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4218
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4219
0
    );
4220
0
}
4221
4222
struct ggml_tensor * ggml_rope_ext_inplace(
4223
        struct ggml_context * ctx,
4224
        struct ggml_tensor  * a,
4225
        struct ggml_tensor  * b,
4226
        struct ggml_tensor  * c,
4227
        int                   n_dims,
4228
        int                   mode,
4229
        int                   n_ctx_orig,
4230
        float                 freq_base,
4231
        float                 freq_scale,
4232
        float                 ext_factor,
4233
        float                 attn_factor,
4234
        float                 beta_fast,
4235
0
        float                 beta_slow) {
4236
0
    return ggml_rope_impl(
4237
0
        ctx, a, b, c, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4238
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4239
0
    );
4240
0
}
4241
4242
struct ggml_tensor * ggml_rope_custom(
4243
        struct ggml_context * ctx,
4244
        struct ggml_tensor  * a,
4245
        struct ggml_tensor  * b,
4246
        int                   n_dims,
4247
        int                   mode,
4248
        int                   n_ctx_orig,
4249
        float                 freq_base,
4250
        float                 freq_scale,
4251
        float                 ext_factor,
4252
        float                 attn_factor,
4253
        float                 beta_fast,
4254
0
        float                 beta_slow) {
4255
0
    return ggml_rope_impl(
4256
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4257
0
        ext_factor, attn_factor, beta_fast, beta_slow, false
4258
0
    );
4259
0
}
4260
4261
struct ggml_tensor * ggml_rope_custom_inplace(
4262
        struct ggml_context * ctx,
4263
        struct ggml_tensor  * a,
4264
        struct ggml_tensor  * b,
4265
        int                   n_dims,
4266
        int                   mode,
4267
        int                   n_ctx_orig,
4268
        float                 freq_base,
4269
        float                 freq_scale,
4270
        float                 ext_factor,
4271
        float                 attn_factor,
4272
        float                 beta_fast,
4273
0
        float                 beta_slow) {
4274
0
    return ggml_rope_impl(
4275
0
        ctx, a, b, NULL, n_dims, NULL, mode, n_ctx_orig, freq_base, freq_scale,
4276
0
        ext_factor, attn_factor, beta_fast, beta_slow, true
4277
0
    );
4278
0
}
4279
4280
// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get
4281
// `corr_dim(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))`
4282
0
static float ggml_rope_yarn_corr_dim(int n_dims, int n_ctx_orig, float n_rot, float base) {
4283
0
    return n_dims * logf(n_ctx_orig / (n_rot * 2 * (float)M_PI)) / (2 * logf(base));
4284
0
}
4285
4286
void ggml_rope_yarn_corr_dims(
4287
    int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]
4288
0
) {
4289
    // start and end correction dims
4290
0
    float start = floorf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_fast, freq_base));
4291
0
    float end   =  ceilf(ggml_rope_yarn_corr_dim(n_dims, n_ctx_orig, beta_slow, freq_base));
4292
0
    dims[0] = MAX(0, start);
4293
0
    dims[1] = MIN(n_dims - 1, end);
4294
0
}
4295
4296
// ggml_rope_back
4297
4298
struct ggml_tensor * ggml_rope_ext_back(
4299
        struct ggml_context * ctx,
4300
        struct ggml_tensor  * a,
4301
        struct ggml_tensor  * b,
4302
        struct ggml_tensor  * c,
4303
        int                   n_dims,
4304
        int                   mode,
4305
        int                   n_ctx_orig,
4306
        float                 freq_base,
4307
        float                 freq_scale,
4308
        float                 ext_factor,
4309
        float                 attn_factor,
4310
        float                 beta_fast,
4311
0
        float                 beta_slow) {
4312
0
    struct ggml_tensor * result = ggml_rope_ext(
4313
0
        ctx, a, b, c, n_dims, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4314
0
    result->op = GGML_OP_ROPE_BACK;
4315
0
    return result;
4316
0
}
4317
4318
struct ggml_tensor * ggml_rope_multi_back(
4319
        struct ggml_context * ctx,
4320
        struct ggml_tensor  * a,
4321
        struct ggml_tensor  * b,
4322
        struct ggml_tensor  * c,
4323
        int                   n_dims,
4324
        int                   sections[4],
4325
        int                   mode,
4326
        int                   n_ctx_orig,
4327
        float                 freq_base,
4328
        float                 freq_scale,
4329
        float                 ext_factor,
4330
        float                 attn_factor,
4331
        float                 beta_fast,
4332
0
        float                 beta_slow) {
4333
0
    struct ggml_tensor * result = ggml_rope_multi(
4334
0
        ctx, a, b, c, n_dims, sections, mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
4335
0
    result->op = GGML_OP_ROPE_BACK;
4336
0
    return result;
4337
0
}
4338
// ggml_clamp
4339
4340
struct ggml_tensor * ggml_clamp(
4341
        struct ggml_context * ctx,
4342
        struct ggml_tensor  * a,
4343
        float                 min,
4344
0
        float                 max) {
4345
    // TODO: when implement backward, fix this:
4346
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
4347
4348
0
    float params[] = { min, max };
4349
0
    ggml_set_op_params(result, params, sizeof(params));
4350
4351
0
    result->op     = GGML_OP_CLAMP;
4352
0
    result->src[0] = a;
4353
4354
0
    return result;
4355
0
}
4356
4357
0
static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4358
0
    return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
4359
0
}
4360
4361
// im2col: [N, IC, IH, IW] => [N, OH, OW, IC*KH*KW]
4362
// a: [OC,IC, KH, KW]
4363
// b: [N, IC, IH, IW]
4364
// result: [N, OH, OW, IC*KH*KW]
4365
struct ggml_tensor * ggml_im2col(
4366
        struct ggml_context * ctx,
4367
        struct ggml_tensor  * a,
4368
        struct ggml_tensor  * b,
4369
        int                   s0,
4370
        int                   s1,
4371
        int                   p0,
4372
        int                   p1,
4373
        int                   d0,
4374
        int                   d1,
4375
        bool                  is_2D,
4376
0
        enum ggml_type        dst_type) {
4377
0
    if (is_2D) {
4378
0
        GGML_ASSERT(a->ne[2] == b->ne[2]);
4379
0
    } else {
4380
        //GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
4381
0
        GGML_ASSERT(b->ne[1] == a->ne[1]);
4382
0
        GGML_ASSERT(b->ne[3] == 1);
4383
0
    }
4384
4385
0
    const int64_t OH = is_2D ? ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1) : 0;
4386
0
    const int64_t OW =         ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4387
4388
0
    GGML_ASSERT((!is_2D || OH > 0) && "b too small compared to a");
4389
0
    GGML_ASSERT((OW > 0)           && "b too small compared to a");
4390
4391
0
    const int64_t ne[4] = {
4392
0
        is_2D ? (a->ne[2] * a->ne[1] * a->ne[0]) : a->ne[1] * a->ne[0],
4393
0
        OW,
4394
0
        is_2D ? OH : b->ne[2],
4395
0
        is_2D ?      b->ne[3] : 1,
4396
0
    };
4397
4398
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4399
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4400
0
    ggml_set_op_params(result, params, sizeof(params));
4401
4402
0
    result->op     = GGML_OP_IM2COL;
4403
0
    result->src[0] = a;
4404
0
    result->src[1] = b;
4405
4406
0
    return result;
4407
0
}
4408
4409
struct ggml_tensor * ggml_im2col_back(
4410
        struct ggml_context * ctx,
4411
        struct ggml_tensor  * a,
4412
        struct ggml_tensor  * b,
4413
        int64_t             * ne,
4414
        int                   s0,
4415
        int                   s1,
4416
        int                   p0,
4417
        int                   p1,
4418
        int                   d0,
4419
        int                   d1,
4420
0
        bool                  is_2D) {
4421
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4422
0
    int32_t params[] = { s0, s1, p0, p1, d0, d1, (is_2D ? 1 : 0) };
4423
0
    ggml_set_op_params(result, params, sizeof(params));
4424
4425
0
    result->op     = GGML_OP_IM2COL_BACK;
4426
0
    result->src[0] = a;
4427
0
    result->src[1] = b;
4428
4429
0
    return result;
4430
0
}
4431
4432
// ggml_conv_1d
4433
4434
struct ggml_tensor * ggml_conv_1d(
4435
        struct ggml_context * ctx,
4436
        struct ggml_tensor  * a,
4437
        struct ggml_tensor  * b,
4438
        int                   s0,
4439
        int                   p0,
4440
0
        int                   d0) {
4441
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16); // [N, OL, IC * K]
4442
4443
0
    struct ggml_tensor * result =
4444
0
        ggml_mul_mat(ctx,
4445
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], (im2col->ne[2] * im2col->ne[1])), // [N, OL, IC * K] => [N*OL, IC * K]
4446
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1]), a->ne[2]));                    // [OC,IC, K] => [OC, IC * K]
4447
4448
0
    result = ggml_reshape_3d(ctx, result, im2col->ne[1], a->ne[2], im2col->ne[2]); // [N, OC, OL]
4449
4450
0
    return result;
4451
0
}
4452
4453
// ggml_conv_1d_ph
4454
4455
struct ggml_tensor* ggml_conv_1d_ph(
4456
        struct ggml_context * ctx,
4457
        struct ggml_tensor  * a,
4458
        struct ggml_tensor  * b,
4459
        int                   s,
4460
0
        int                   d) {
4461
0
    return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
4462
0
}
4463
4464
// ggml_conv_1d_dw
4465
4466
struct ggml_tensor * ggml_conv_1d_dw(
4467
        struct ggml_context * ctx,
4468
        struct ggml_tensor  * a,
4469
        struct ggml_tensor  * b,
4470
        int                   s0,
4471
        int                   p0,
4472
0
        int                   d0) {
4473
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, b, b->ne[0], 1, b->ne[1], b->ne[2]);
4474
4475
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, new_b, s0, 0, p0, 0, d0, 0, false, GGML_TYPE_F16);
4476
4477
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, im2col, a);
4478
4479
0
    result = ggml_reshape_3d(ctx, result, result->ne[0], result->ne[2], 1);
4480
4481
0
    return result;
4482
0
}
4483
4484
// ggml_conv_1d_dw_ph
4485
4486
struct ggml_tensor * ggml_conv_1d_dw_ph(
4487
        struct ggml_context * ctx,
4488
        struct ggml_tensor  * a,
4489
        struct ggml_tensor  * b,
4490
        int                   s0,
4491
0
        int                   d0) {
4492
0
    return ggml_conv_1d_dw(ctx, a, b, s0, a->ne[0] / 2, d0);
4493
0
}
4494
4495
// ggml_conv_transpose_1d
4496
4497
0
static int64_t ggml_calc_conv_transpose_1d_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
4498
0
    return (ins - 1) * s - 2 * p + d * (ks - 1) + 1;
4499
0
}
4500
4501
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
4502
        struct ggml_context * ctx,
4503
        struct ggml_tensor  * a,
4504
        struct ggml_tensor  * b,
4505
        int                   s0,
4506
        int                   p0,
4507
0
        int                   d0) {
4508
0
    GGML_ASSERT(ggml_is_matrix(b));
4509
0
    GGML_ASSERT(a->ne[2] == b->ne[1]);
4510
0
    GGML_ASSERT(a->ne[3] == 1);
4511
4512
0
    GGML_ASSERT(p0 == 0);
4513
0
    GGML_ASSERT(d0 == 1);
4514
4515
0
    const int64_t ne[4] = {
4516
0
        ggml_calc_conv_transpose_1d_output_size(b->ne[0], a->ne[0], s0, 0 /*p0*/, 1 /*d0*/),
4517
0
        a->ne[1], b->ne[2], 1,
4518
0
    };
4519
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4520
4521
0
    int32_t params[] = { s0, p0, d0 };
4522
0
    ggml_set_op_params(result, params, sizeof(params));
4523
4524
0
    result->op     = GGML_OP_CONV_TRANSPOSE_1D;
4525
0
    result->src[0] = a;
4526
0
    result->src[1] = b;
4527
4528
0
    return result;
4529
0
}
4530
4531
// ggml_conv_2d
4532
4533
// a: [OC,IC, KH, KW]
4534
// b: [N, IC, IH, IW]
4535
// result: [N, OC, OH, OW]
4536
struct ggml_tensor * ggml_conv_2d(
4537
        struct ggml_context * ctx,
4538
        struct ggml_tensor  * a,
4539
        struct ggml_tensor  * b,
4540
        int                   s0,
4541
        int                   s1,
4542
        int                   p0,
4543
        int                   p1,
4544
        int                   d0,
4545
0
        int                   d1) {
4546
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, a, b, s0, s1, p0, p1, d0, d1, true, a->type); // [N, OH, OW, IC * KH * KW]
4547
4548
0
    struct ggml_tensor * result =
4549
0
        ggml_mul_mat(ctx,
4550
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0],  im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N, OH, OW, IC * KH * KW] => [N*OH*OW, IC * KH * KW]
4551
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2]),  a->ne[3]));                       // [OC,IC, KH, KW] => [OC, IC * KH * KW]
4552
4553
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], im2col->ne[3], a->ne[3]); // [OC, N, OH, OW]
4554
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OH, OW]
4555
4556
4557
0
    return result;
4558
0
}
4559
4560
// a: [OC*IC, KD, KH, KW]
4561
// b: [N*IC, ID, IH, IW]
4562
// result: [N*OD, OH, OW, IC * KD * KH * KW]
4563
struct ggml_tensor * ggml_im2col_3d(
4564
        struct ggml_context * ctx,
4565
        struct ggml_tensor  * a,
4566
        struct ggml_tensor  * b,
4567
        int64_t               IC,
4568
        int                   s0, // stride width
4569
        int                   s1, // stride height
4570
        int                   s2, // stride depth
4571
        int                   p0, // padding width
4572
        int                   p1, // padding height
4573
        int                   p2, // padding depth
4574
        int                   d0, // dilation width
4575
        int                   d1, // dilation height
4576
        int                   d2, // dilation depth
4577
0
        enum ggml_type        dst_type) {
4578
0
    const int64_t N = b->ne[3] / IC;
4579
0
    const int64_t ID = b->ne[2];
4580
0
    const int64_t IH = b->ne[1];
4581
0
    const int64_t IW = b->ne[0];
4582
4583
0
    const int64_t OC = a->ne[3] / IC;
4584
0
    UNUSED(OC);
4585
0
    const int64_t KD = a->ne[2];
4586
0
    const int64_t KH = a->ne[1];
4587
0
    const int64_t KW = a->ne[0];
4588
0
    const int64_t OD = ggml_calc_conv_output_size(ID, KD, s2, p2, d2);
4589
0
    const int64_t OH = ggml_calc_conv_output_size(IH, KH, s1, p1, d1);
4590
0
    const int64_t OW = ggml_calc_conv_output_size(IW, KW, s0, p0, d0);
4591
4592
0
    GGML_ASSERT((OD > 0)  && "b too small compared to a");
4593
0
    GGML_ASSERT((OH > 0)  && "b too small compared to a");
4594
0
    GGML_ASSERT((OW > 0)  && "b too small compared to a");
4595
4596
4597
0
    const int64_t ne[4] = {KW*KH*KD*IC, OW, OH, OD*N};
4598
4599
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, dst_type, 4, ne);
4600
0
    int32_t params[] = { s0, s1, s2, p0, p1, p2, d0, d1, d2, (int32_t)IC};
4601
0
    ggml_set_op_params(result, params, sizeof(params));
4602
4603
0
    result->op     = GGML_OP_IM2COL_3D;
4604
0
    result->src[0] = a;
4605
0
    result->src[1] = b;
4606
4607
0
    return result;
4608
0
}
4609
4610
// a: [OC*IC, KD, KH, KW]
4611
// b: [N*IC, ID, IH, IW]
4612
// result: [N*OC, OD, OH, OW]
4613
struct ggml_tensor * ggml_conv_3d(
4614
        struct ggml_context * ctx,
4615
        struct ggml_tensor  * a,
4616
        struct ggml_tensor  * b,
4617
        int64_t               IC,
4618
        int                   s0, // stride width
4619
        int                   s1, // stride height
4620
        int                   s2, // stride depth
4621
        int                   p0, // padding width
4622
        int                   p1, // padding height
4623
        int                   p2, // padding depth
4624
        int                   d0, // dilation width
4625
        int                   d1, // dilation height
4626
        int                   d2  // dilation depth
4627
0
        ) {
4628
0
    struct ggml_tensor * im2col = ggml_im2col_3d(ctx, a, b, IC, s0, s1, s2, p0, p1, p2, d0, d1, d2, a->type); // [N*OD, OH, OW, IC * KD * KH * KW]
4629
4630
0
    int64_t OC = a->ne[3] / IC;
4631
0
    int64_t N = b->ne[3] / IC;
4632
0
    struct ggml_tensor * result =
4633
0
        ggml_mul_mat(ctx,
4634
0
                ggml_reshape_2d(ctx, im2col, im2col->ne[0], im2col->ne[3] * im2col->ne[2] * im2col->ne[1]), // [N*OD, OH, OW, IC * KD * KH * KW] => [N*OD*OH*OW, IC * KD * KH * KW]
4635
0
                ggml_reshape_2d(ctx, a, (a->ne[0] * a->ne[1] * a->ne[2] * IC), OC));                          // [OC*IC, KD, KH, KW] => [OC, IC * KD * KH * KW]
4636
4637
0
    int64_t OD = im2col->ne[3] / N;
4638
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1]*im2col->ne[2], OD, N, OC); // [OC, N*OD*OH*OW] => [OC, N, OD, OH*OW]
4639
0
    result = ggml_cont(ctx, ggml_permute(ctx, result, 0, 1, 3, 2)); // [N, OC, OD, OH*OW]
4640
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], OD, OC * N); // [N*OC, OD, OH, OW]
4641
4642
0
    return result;
4643
0
}
4644
4645
// ggml_conv_2d_sk_p0
4646
4647
struct ggml_tensor * ggml_conv_2d_sk_p0(
4648
        struct ggml_context * ctx,
4649
        struct ggml_tensor  * a,
4650
0
        struct ggml_tensor  * b) {
4651
0
    return ggml_conv_2d(ctx, a, b, a->ne[0], a->ne[1], 0, 0, 1, 1);
4652
0
}
4653
4654
// ggml_conv_2d_s1_ph
4655
4656
struct ggml_tensor * ggml_conv_2d_s1_ph(
4657
        struct ggml_context * ctx,
4658
        struct ggml_tensor  * a,
4659
0
        struct ggml_tensor  * b) {
4660
0
    return ggml_conv_2d(ctx, a, b, 1, 1, a->ne[0] / 2, a->ne[1] / 2, 1, 1);
4661
0
}
4662
4663
// ggml_conv_2d_dw
4664
4665
struct ggml_tensor * ggml_conv_2d_dw(
4666
        struct ggml_context * ctx,
4667
        struct ggml_tensor  * a,
4668
        struct ggml_tensor  * b,
4669
        int                   s0,
4670
        int                   s1,
4671
        int                   p0,
4672
        int                   p1,
4673
        int                   d0,
4674
0
        int                   d1) {
4675
0
    struct ggml_tensor * new_a = ggml_reshape_4d(ctx, a, a->ne[0], a->ne[1], 1, a->ne[2] * a->ne[3]);
4676
0
    struct ggml_tensor * im2col = ggml_im2col(ctx, new_a,
4677
0
                                        ggml_reshape_4d(ctx, b, b->ne[0], b->ne[1], 1, b->ne[2] * b->ne[3]),
4678
0
                                        s0, s1, p0, p1, d0, d1, true, GGML_TYPE_F16); // [N * IC, OH, OW, KH * KW]
4679
0
    struct ggml_tensor * new_b = ggml_reshape_4d(ctx, im2col, im2col->ne[0], im2col->ne[2] * im2col->ne[1], b->ne[2], b->ne[3]); // [N * IC, OH, OW, KH * KW] => [N, IC, OH * OW, KH * KW]
4680
4681
0
    new_a = ggml_reshape_4d(ctx, new_a, (new_a->ne[0] * new_a->ne[1]), new_a->ne[2],  new_a->ne[3], 1);                       // [OC,1, KH, KW] => [1, OC, 1, KH * KW]
4682
0
    struct ggml_tensor * result = ggml_mul_mat(ctx, new_a, new_b);
4683
0
    result = ggml_reshape_4d(ctx, result, im2col->ne[1], im2col->ne[2], b->ne[2], b->ne[3]); // [N, OC, OH, OW]
4684
4685
0
    return result;
4686
0
}
4687
4688
// ggml_conv_2d_dw_direct
4689
4690
struct ggml_tensor * ggml_conv_2d_dw_direct(
4691
        struct ggml_context * ctx,
4692
        struct ggml_tensor  * a,
4693
        struct ggml_tensor  * b,
4694
        int                   stride0,
4695
        int                   stride1,
4696
        int                   pad0,
4697
        int                   pad1,
4698
        int                   dilation0,
4699
0
        int                   dilation1) {
4700
0
    GGML_ASSERT(a->ne[2] == 1);
4701
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4702
0
    int64_t ne[4];
4703
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], stride0, pad0, dilation0);
4704
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], stride1, pad1, dilation1);
4705
0
    ne[2] = b->ne[2];
4706
0
    ne[3] = b->ne[3];
4707
4708
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4709
4710
0
    if (ggml_is_contiguous_channels(b)) {
4711
        // Result will be permuted the same way as input (CWHN order)
4712
0
        const int64_t type_size = ggml_type_size(result->type);
4713
0
        GGML_ASSERT(ggml_blck_size(result->type) == 1);
4714
0
        result->nb[0] = result->ne[2] * type_size;
4715
0
        result->nb[1] = result->ne[0] * result->nb[0];
4716
0
        result->nb[2] = type_size;
4717
0
    }
4718
4719
0
    int32_t params[] = { stride0, stride1, pad0, pad1, dilation0, dilation1 };
4720
0
    ggml_set_op_params(result, params, sizeof(params));
4721
4722
0
    result->op     = GGML_OP_CONV_2D_DW;
4723
0
    result->src[0] = a;
4724
0
    result->src[1] = b;
4725
0
    return result;
4726
0
}
4727
4728
// ggml_conv_2d_direct
4729
4730
struct ggml_tensor * ggml_conv_2d_direct(
4731
        struct ggml_context * ctx,
4732
        struct ggml_tensor  * a,   // convolution kernel [KW, KH, IC, OC]
4733
        struct ggml_tensor  * b,   // input data [W, H, C, N]
4734
        int                   s0,  // stride dimension 0
4735
        int                   s1,  // stride dimension 1
4736
        int                   p0,  // padding dimension 0
4737
        int                   p1,  // padding dimension 1
4738
        int                   d0,  // dilation dimension 0
4739
0
        int                   d1) {// dilation dimension 1
4740
4741
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
4742
    //GGML_ASSERT(a->type == b->type);
4743
4744
0
    int64_t ne[4];
4745
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4746
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4747
0
    ne[2] = a->ne[3];
4748
0
    ne[3] = b->ne[3];
4749
4750
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne);
4751
4752
0
    ggml_set_op_params_i32(result, 0, s0);
4753
0
    ggml_set_op_params_i32(result, 1, s1);
4754
0
    ggml_set_op_params_i32(result, 2, p0);
4755
0
    ggml_set_op_params_i32(result, 3, p1);
4756
0
    ggml_set_op_params_i32(result, 4, d0);
4757
0
    ggml_set_op_params_i32(result, 5, d1);
4758
4759
0
    result->op = GGML_OP_CONV_2D;
4760
0
    result->src[0] = a;
4761
0
    result->src[1] = b;
4762
4763
0
    return result;
4764
0
}
4765
4766
// ggml_conv_3d_direct
4767
4768
struct ggml_tensor * ggml_conv_3d_direct(
4769
        struct ggml_context * ctx,
4770
        struct ggml_tensor  * a,
4771
        struct ggml_tensor  * b,
4772
        int                   s0,
4773
        int                   s1,
4774
        int                   s2,
4775
        int                   p0,
4776
        int                   p1,
4777
        int                   p2,
4778
        int                   d0,
4779
        int                   d1,
4780
        int                   d2,
4781
        int                   c,
4782
        int                   n,
4783
0
        int                   oc) {
4784
4785
0
    GGML_ASSERT(a->ne[3] == (int64_t) c * oc);
4786
0
    GGML_ASSERT(b->ne[3] == (int64_t) c * n);
4787
4788
0
    int64_t ne[4];
4789
0
    ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0);
4790
0
    ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1);
4791
0
    ne[2] = ggml_calc_conv_output_size(b->ne[2], a->ne[2], s2, p2, d2);
4792
0
    ne[3] = (int64_t) oc * n;
4793
4794
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4795
4796
0
    ggml_set_op_params_i32(result, 0,  s0);
4797
0
    ggml_set_op_params_i32(result, 1,  s1);
4798
0
    ggml_set_op_params_i32(result, 2,  s2);
4799
0
    ggml_set_op_params_i32(result, 3,  p0);
4800
0
    ggml_set_op_params_i32(result, 4,  p1);
4801
0
    ggml_set_op_params_i32(result, 5,  p2);
4802
0
    ggml_set_op_params_i32(result, 6,  d0);
4803
0
    ggml_set_op_params_i32(result, 7,  d1);
4804
0
    ggml_set_op_params_i32(result, 8,  d2);
4805
0
    ggml_set_op_params_i32(result, 9,  c);
4806
0
    ggml_set_op_params_i32(result, 10, n);
4807
0
    ggml_set_op_params_i32(result, 11, oc);
4808
4809
0
    result->op = GGML_OP_CONV_3D;
4810
0
    result->src[0] = a;
4811
0
    result->src[1] = b;
4812
4813
0
    return result;
4814
0
}
4815
4816
// ggml_conv_transpose_2d_p0
4817
4818
0
static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) {
4819
0
    return (ins - 1) * s - 2 * p + ks;
4820
0
}
4821
4822
struct ggml_tensor * ggml_conv_transpose_2d_p0(
4823
        struct ggml_context * ctx,
4824
        struct ggml_tensor  * a,
4825
        struct ggml_tensor  * b,
4826
0
        int                   stride) {
4827
0
    GGML_ASSERT(a->ne[3] == b->ne[2]);
4828
4829
0
    const int64_t ne[4] = {
4830
0
        ggml_calc_conv_transpose_output_size(b->ne[0], a->ne[0], stride, 0 /*p0*/),
4831
0
        ggml_calc_conv_transpose_output_size(b->ne[1], a->ne[1], stride, 0 /*p1*/),
4832
0
        a->ne[2], b->ne[3],
4833
0
    };
4834
4835
0
    struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4836
4837
0
    ggml_set_op_params_i32(result, 0, stride);
4838
4839
0
    result->op     = GGML_OP_CONV_TRANSPOSE_2D;
4840
0
    result->src[0] = a;
4841
0
    result->src[1] = b;
4842
4843
0
    return result;
4844
0
}
4845
4846
// ggml_pool_*
4847
4848
0
static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, float p) {
4849
0
    return (ins + 2 * p - ks) / s + 1;
4850
0
}
4851
4852
// ggml_pool_1d
4853
4854
struct ggml_tensor * ggml_pool_1d(
4855
        struct ggml_context * ctx,
4856
        struct ggml_tensor  * a,
4857
        enum ggml_op_pool     op,
4858
        int                   k0,
4859
        int                   s0,
4860
0
        int                   p0) {
4861
0
    const int64_t ne[4] = {
4862
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4863
0
        a->ne[1],
4864
0
        a->ne[2],
4865
0
        a->ne[3],
4866
0
    };
4867
0
    GGML_ASSERT(ne[0] > 0);
4868
4869
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4870
4871
0
    int32_t params[] = { op, k0, s0, p0 };
4872
0
    ggml_set_op_params(result, params, sizeof(params));
4873
4874
0
    result->op     = GGML_OP_POOL_1D;
4875
0
    result->src[0] = a;
4876
4877
0
    return result;
4878
0
}
4879
4880
// ggml_pool_2d
4881
4882
struct ggml_tensor * ggml_pool_2d(
4883
        struct ggml_context * ctx,
4884
        struct ggml_tensor  * a,
4885
        enum ggml_op_pool     op,
4886
        int                   k0,
4887
        int                   k1,
4888
        int                   s0,
4889
        int                   s1,
4890
        float                 p0,
4891
0
        float                 p1) {
4892
0
    struct ggml_tensor * result;
4893
0
    const int64_t ne[4] = {
4894
0
        ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
4895
0
        ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
4896
0
        a->ne[2],
4897
0
        a->ne[3],
4898
0
    };
4899
0
    GGML_ASSERT(ne[0] > 0);
4900
0
    GGML_ASSERT(ne[1] > 0);
4901
4902
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
4903
4904
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4905
0
    ggml_set_op_params(result, params, sizeof(params));
4906
4907
0
    result->op     = GGML_OP_POOL_2D;
4908
0
    result->src[0] = a;
4909
4910
0
    return result;
4911
0
}
4912
4913
struct ggml_tensor * ggml_pool_2d_back(
4914
        struct ggml_context * ctx,
4915
        struct ggml_tensor  * a,
4916
        struct ggml_tensor  * af,
4917
        enum ggml_op_pool     op,
4918
        int                   k0,
4919
        int                   k1,
4920
        int                   s0,
4921
        int                   s1,
4922
        float                 p0,
4923
0
        float                 p1) {
4924
0
    struct ggml_tensor * result;
4925
0
    result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, af->ne);
4926
4927
0
    int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
4928
0
    ggml_set_op_params(result, params, sizeof(params));
4929
4930
0
    result->op     = GGML_OP_POOL_2D_BACK;
4931
0
    result->src[0] = a;
4932
0
    result->src[1] = af;
4933
4934
0
    return result;
4935
0
}
4936
4937
// ggml_upscale / ggml_interpolate
4938
4939
static struct ggml_tensor * ggml_interpolate_impl(
4940
        struct ggml_context * ctx,
4941
        struct ggml_tensor  * a,
4942
        int64_t               ne0,
4943
        int64_t               ne1,
4944
        int64_t               ne2,
4945
        int64_t               ne3,
4946
0
        uint32_t              mode) {
4947
0
    GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT);
4948
    // TODO: implement antialias for modes other than bilinear
4949
0
    GGML_ASSERT(!(mode & GGML_SCALE_FLAG_ANTIALIAS) || (mode & 0xFF) == GGML_SCALE_MODE_BILINEAR);
4950
4951
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3);
4952
4953
0
    ggml_set_op_params_i32(result, 0, (int32_t)mode);
4954
4955
0
    result->op     = GGML_OP_UPSCALE;
4956
0
    result->src[0] = a;
4957
4958
0
    return result;
4959
0
}
4960
4961
struct ggml_tensor * ggml_upscale(
4962
        struct ggml_context * ctx,
4963
        struct ggml_tensor  * a,
4964
        int                   scale_factor,
4965
0
        enum ggml_scale_mode  mode) {
4966
0
    GGML_ASSERT(scale_factor > 1);
4967
0
    return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode);
4968
0
}
4969
4970
struct ggml_tensor * ggml_upscale_ext(
4971
        struct ggml_context * ctx,
4972
        struct ggml_tensor  * a,
4973
        int                   ne0,
4974
        int                   ne1,
4975
        int                   ne2,
4976
        int                   ne3,
4977
0
        enum ggml_scale_mode  mode) {
4978
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4979
0
}
4980
4981
struct ggml_tensor * ggml_interpolate(
4982
        struct ggml_context * ctx,
4983
        struct ggml_tensor  * a,
4984
        int64_t               ne0,
4985
        int64_t               ne1,
4986
        int64_t               ne2,
4987
        int64_t               ne3,
4988
0
        uint32_t              mode) {
4989
0
    return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode);
4990
0
}
4991
4992
// ggml_pad
4993
4994
struct ggml_tensor * ggml_pad(
4995
        struct ggml_context * ctx,
4996
        struct ggml_tensor  * a,
4997
        int                   p0,
4998
        int                   p1,
4999
        int                   p2,
5000
0
        int                   p3) {
5001
0
    return ggml_pad_ext(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5002
0
}
5003
5004
// ggml_pad_circular
5005
5006
struct ggml_tensor * ggml_pad_circular(
5007
        struct ggml_context * ctx,
5008
        struct ggml_tensor  * a,
5009
        int                   p0,
5010
        int                   p1,
5011
        int                   p2,
5012
0
        int                   p3) {
5013
0
    return ggml_pad_ext_circular(ctx, a, 0, p0, 0, p1, 0, p2, 0, p3);
5014
0
}
5015
5016
struct ggml_tensor * ggml_pad_ext(
5017
            struct ggml_context * ctx,
5018
            struct ggml_tensor  * a,
5019
            int                  lp0,
5020
            int                  rp0,
5021
            int                  lp1,
5022
            int                  rp1,
5023
            int                  lp2,
5024
            int                  rp2,
5025
            int                  lp3,
5026
            int                  rp3
5027
0
            ) {
5028
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5029
0
            a->ne[0] + lp0 + rp0,
5030
0
            a->ne[1] + lp1 + rp1,
5031
0
            a->ne[2] + lp2 + rp2,
5032
0
            a->ne[3] + lp3 + rp3);
5033
5034
0
    ggml_set_op_params_i32(result, 0, lp0);
5035
0
    ggml_set_op_params_i32(result, 1, rp0);
5036
0
    ggml_set_op_params_i32(result, 2, lp1);
5037
0
    ggml_set_op_params_i32(result, 3, rp1);
5038
0
    ggml_set_op_params_i32(result, 4, lp2);
5039
0
    ggml_set_op_params_i32(result, 5, rp2);
5040
0
    ggml_set_op_params_i32(result, 6, lp3);
5041
0
    ggml_set_op_params_i32(result, 7, rp3);
5042
0
    ggml_set_op_params_i32(result, 8, 0); // not circular by default
5043
5044
5045
0
    result->op     = GGML_OP_PAD;
5046
0
    result->src[0] = a;
5047
5048
0
    return result;
5049
0
}
5050
5051
// ggml_pad_ext_circular
5052
5053
struct ggml_tensor * ggml_pad_ext_circular(
5054
        struct ggml_context * ctx,
5055
        struct ggml_tensor  * a,
5056
        int                  lp0,
5057
        int                  rp0,
5058
        int                  lp1,
5059
        int                  rp1,
5060
        int                  lp2,
5061
        int                  rp2,
5062
        int                  lp3,
5063
        int                  rp3
5064
0
        ) {
5065
0
    struct ggml_tensor * result = ggml_pad_ext(ctx, a, lp0, rp0, lp1, rp1, lp2, rp2, lp3, rp3);
5066
0
    ggml_set_op_params_i32(result, 8, 1); // circular
5067
0
    return result;
5068
0
}
5069
5070
// ggml_pad_reflect_1d
5071
5072
struct ggml_tensor * ggml_pad_reflect_1d(
5073
        struct ggml_context * ctx,
5074
        struct ggml_tensor  * a,
5075
        int                   p0,
5076
0
        int                   p1) {
5077
0
    GGML_ASSERT(p0 >= 0);
5078
0
    GGML_ASSERT(p1 >= 0);
5079
5080
0
    GGML_ASSERT(p0 < a->ne[0]); // padding length on each size must be less than the
5081
0
    GGML_ASSERT(p1 < a->ne[0]); // existing length of the dimension being padded
5082
5083
0
    GGML_ASSERT(ggml_is_contiguous(a));
5084
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5085
5086
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type,
5087
0
            a->ne[0] + p0 + p1,
5088
0
            a->ne[1],
5089
0
            a->ne[2],
5090
0
            a->ne[3]);
5091
5092
0
    int32_t params[] = { p0, p1 };
5093
0
    ggml_set_op_params(result, params, sizeof(params));
5094
5095
0
    result->op     = GGML_OP_PAD_REFLECT_1D;
5096
0
    result->src[0] = a;
5097
5098
0
    return result;
5099
0
}
5100
5101
// ggml_roll
5102
5103
struct ggml_tensor * ggml_roll(
5104
        struct ggml_context * ctx,
5105
        struct ggml_tensor  * a,
5106
        int                   shift0,
5107
        int                   shift1,
5108
        int                   shift2,
5109
0
        int                   shift3) {
5110
0
    GGML_ASSERT(a->nb[0] == ggml_type_size(a->type));
5111
0
    GGML_ASSERT(abs(shift0) < a->ne[0]);
5112
0
    GGML_ASSERT(abs(shift1) < a->ne[1]);
5113
0
    GGML_ASSERT(abs(shift2) < a->ne[2]);
5114
0
    GGML_ASSERT(abs(shift3) < a->ne[3]);
5115
5116
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5117
5118
0
    ggml_set_op_params_i32(result, 0, shift0);
5119
0
    ggml_set_op_params_i32(result, 1, shift1);
5120
0
    ggml_set_op_params_i32(result, 2, shift2);
5121
0
    ggml_set_op_params_i32(result, 3, shift3);
5122
5123
0
    result->op     = GGML_OP_ROLL;
5124
0
    result->src[0] = a;
5125
5126
0
    return result;
5127
0
}
5128
5129
// ggml_timestep_embedding
5130
5131
struct ggml_tensor * ggml_timestep_embedding(
5132
        struct ggml_context * ctx,
5133
        struct ggml_tensor  * timesteps,
5134
        int                   dim,
5135
0
        int                   max_period) {
5136
5137
0
    struct ggml_tensor * result = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, dim, timesteps->ne[0]);
5138
5139
0
    ggml_set_op_params_i32(result, 0, dim);
5140
0
    ggml_set_op_params_i32(result, 1, max_period);
5141
5142
0
    result->op     = GGML_OP_TIMESTEP_EMBEDDING;
5143
0
    result->src[0] = timesteps;
5144
5145
0
    return result;
5146
0
}
5147
5148
// ggml_tri
5149
5150
struct ggml_tensor * ggml_tri(
5151
    struct ggml_context * ctx,
5152
    struct ggml_tensor  * a,
5153
0
    enum ggml_tri_type    type) {
5154
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5155
5156
0
    GGML_ASSERT(ggml_is_contiguous(a));
5157
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
5158
5159
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
5160
5161
0
    ggml_set_op_params_i32(result, 0, type);
5162
5163
0
    result->op = GGML_OP_TRI;
5164
0
    result->src[0] = a;
5165
5166
0
    return result;
5167
0
}
5168
5169
// ggml_fill
5170
5171
static struct ggml_tensor * ggml_fill_impl(
5172
    struct ggml_context * ctx,
5173
    struct ggml_tensor  * a,
5174
    float                 c,
5175
0
    bool                  inplace) {
5176
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5177
0
    GGML_ASSERT(ggml_is_contiguous(a));
5178
5179
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5180
5181
0
    ggml_set_op_params_f32(result, 0, c);
5182
5183
0
    result->op = GGML_OP_FILL;
5184
0
    result->src[0] = a;
5185
5186
0
    return result;
5187
0
}
5188
5189
struct ggml_tensor * ggml_fill(
5190
    struct ggml_context * ctx,
5191
    struct ggml_tensor  * a,
5192
0
    float                 c) {
5193
0
    return ggml_fill_impl(ctx, a, c, false);
5194
0
}
5195
5196
struct ggml_tensor * ggml_fill_inplace(
5197
    struct ggml_context * ctx,
5198
    struct ggml_tensor  * a,
5199
0
    float                 c) {
5200
0
    return ggml_fill_impl(ctx, a, c, true);
5201
0
}
5202
5203
// ggml_argsort
5204
5205
struct ggml_tensor * ggml_argsort(
5206
        struct ggml_context  * ctx,
5207
        struct ggml_tensor   * a,
5208
0
        enum ggml_sort_order   order) {
5209
0
    GGML_ASSERT(a->ne[0] <= INT32_MAX);
5210
5211
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, GGML_MAX_DIMS, a->ne);
5212
5213
0
    ggml_set_op_params_i32(result, 0, (int32_t) order);
5214
5215
0
    result->op     = GGML_OP_ARGSORT;
5216
0
    result->src[0] = a;
5217
5218
0
    return result;
5219
0
}
5220
5221
// ggml_argsort_top_k
5222
5223
struct ggml_tensor * ggml_argsort_top_k(
5224
        struct ggml_context * ctx,
5225
        struct ggml_tensor  * a,
5226
0
        int                   k) {
5227
0
    GGML_ASSERT(a->ne[0] >= k);
5228
5229
0
    struct ggml_tensor * result = ggml_argsort(ctx, a, GGML_SORT_ORDER_DESC);
5230
5231
0
    result = ggml_view_4d(ctx, result,
5232
0
                k, result->ne[1], result->ne[2], result->ne[3],
5233
0
                   result->nb[1], result->nb[2], result->nb[3],
5234
0
                0);
5235
5236
0
    return result;
5237
0
}
5238
5239
// ggml_top_k
5240
5241
struct ggml_tensor * ggml_top_k(
5242
        struct ggml_context * ctx,
5243
        struct ggml_tensor  * a,
5244
0
        int                   k) {
5245
0
    GGML_ASSERT(a->ne[0] >= k);
5246
5247
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_I32, k, a->ne[1], a->ne[2], a->ne[3]);
5248
5249
0
    result->op     = GGML_OP_TOP_K;
5250
0
    result->src[0] = a;
5251
5252
0
    return result;
5253
0
}
5254
5255
// ggml_arange
5256
5257
struct ggml_tensor * ggml_arange(
5258
        struct ggml_context * ctx,
5259
        float                 start,
5260
        float                 stop,
5261
0
        float                 step) {
5262
0
    GGML_ASSERT(stop > start);
5263
5264
0
    const int64_t steps = (int64_t) ceilf((stop - start) / step);
5265
5266
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, steps);
5267
5268
0
    ggml_set_op_params_f32(result, 0, start);
5269
0
    ggml_set_op_params_f32(result, 1, stop);
5270
0
    ggml_set_op_params_f32(result, 2, step);
5271
5272
0
    result->op = GGML_OP_ARANGE;
5273
5274
0
    return result;
5275
0
}
5276
5277
// ggml_flash_attn_ext
5278
5279
struct ggml_tensor * ggml_flash_attn_ext(
5280
        struct ggml_context * ctx,
5281
        struct ggml_tensor  * q,
5282
        struct ggml_tensor  * k,
5283
        struct ggml_tensor  * v,
5284
        struct ggml_tensor  * mask,
5285
        float                 scale,
5286
        float                 max_bias,
5287
0
        float                 logit_softcap) {
5288
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5289
    // TODO: check if vT can be multiplied by (k*qT)
5290
5291
0
    GGML_ASSERT(q->ne[3] == k->ne[3]);
5292
0
    GGML_ASSERT(q->ne[3] == v->ne[3]);
5293
5294
0
    if (mask) {
5295
0
        GGML_ASSERT(ggml_is_contiguous(mask));
5296
        //GGML_ASSERT(ggml_can_repeat_rows(mask, qk));
5297
5298
0
        GGML_ASSERT(q->ne[2] % mask->ne[2] == 0);
5299
0
        GGML_ASSERT(q->ne[3] % mask->ne[3] == 0);
5300
0
    }
5301
5302
0
    if (max_bias > 0.0f) {
5303
0
        GGML_ASSERT(mask);
5304
0
    }
5305
5306
    // permute(0, 2, 1, 3)
5307
0
    int64_t ne[4] = { v->ne[0], q->ne[2], q->ne[1], q->ne[3] };
5308
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5309
5310
0
    float params[] = { scale, max_bias, logit_softcap };
5311
0
    ggml_set_op_params(result, params, sizeof(params));
5312
5313
0
    result->op     = GGML_OP_FLASH_ATTN_EXT;
5314
0
    result->src[0] = q;
5315
0
    result->src[1] = k;
5316
0
    result->src[2] = v;
5317
0
    result->src[3] = mask;
5318
5319
0
    return result;
5320
0
}
5321
5322
void ggml_flash_attn_ext_set_prec(
5323
        struct ggml_tensor * a,
5324
0
        enum ggml_prec       prec) {
5325
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5326
5327
0
    const int32_t prec_i32 = (int32_t) prec;
5328
5329
0
    ggml_set_op_params_i32(a, 3, prec_i32); // scale is on first pos, max_bias on second
5330
0
}
5331
5332
enum ggml_prec ggml_flash_attn_ext_get_prec(
5333
0
        const struct ggml_tensor * a) {
5334
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5335
5336
0
    const int32_t prec_i32 = ggml_get_op_params_i32(a, 3);
5337
5338
0
    return (enum ggml_prec) prec_i32;
5339
0
}
5340
5341
void ggml_flash_attn_ext_add_sinks(
5342
        struct ggml_tensor * a,
5343
0
        struct ggml_tensor * sinks) {
5344
0
    if (!sinks) {
5345
0
        a->src[4] = NULL;
5346
0
        return;
5347
0
    }
5348
5349
0
    GGML_ASSERT(a->op == GGML_OP_FLASH_ATTN_EXT);
5350
0
    GGML_ASSERT(a->src[4] == NULL);
5351
0
    GGML_ASSERT(a->src[0]->ne[2] == sinks->ne[0]);
5352
0
    GGML_ASSERT(sinks->type == GGML_TYPE_F32);
5353
5354
0
    a->src[4] = sinks;
5355
0
}
5356
5357
// ggml_flash_attn_back
5358
5359
struct ggml_tensor * ggml_flash_attn_back(
5360
        struct ggml_context * ctx,
5361
        struct ggml_tensor  * q,
5362
        struct ggml_tensor  * k,
5363
        struct ggml_tensor  * v,
5364
        struct ggml_tensor  * d,
5365
0
        bool                  masked) {
5366
0
    GGML_ABORT("TODO: adapt to ggml_flash_attn_ext() changes");
5367
5368
0
    GGML_ASSERT(ggml_can_mul_mat(k, q));
5369
    // TODO: check if vT can be multiplied by (k*qT)
5370
5371
    // d shape [D,N,ne2,ne3]
5372
    // q shape [D,N,ne2,ne3]
5373
    // k shape [D,M,kvne2,ne3]
5374
    // v shape [M,D,kvne2,ne3]
5375
5376
0
    const int64_t     D = q->ne[0];
5377
0
    const int64_t     N = q->ne[1];
5378
0
    const int64_t     M = k->ne[1];
5379
0
    const int64_t   ne2 = q->ne[2];
5380
0
    const int64_t   ne3 = q->ne[3];
5381
0
    const int64_t kvne2 = k->ne[2];
5382
5383
0
    GGML_ASSERT(k->ne[0] == D);
5384
0
    GGML_ASSERT(v->ne[0] == M);
5385
0
    GGML_ASSERT(v->ne[1] == D);
5386
0
    GGML_ASSERT(d->ne[0] == D);
5387
0
    GGML_ASSERT(d->ne[1] == N);
5388
0
    GGML_ASSERT(k->ne[2] == kvne2);
5389
0
    GGML_ASSERT(k->ne[3] == ne3);
5390
0
    GGML_ASSERT(v->ne[2] == kvne2);
5391
0
    GGML_ASSERT(v->ne[3] == ne3);
5392
0
    GGML_ASSERT(d->ne[2] == ne2);
5393
0
    GGML_ASSERT(d->ne[3] == ne3);
5394
5395
0
    GGML_ASSERT(ne2 % kvne2 == 0);
5396
5397
    // store gradients of q, k and v as continuous tensors concatenated in result.
5398
    // note: v and gradv are actually transposed, i.e. v->ne[0] != D.
5399
0
    const int64_t elem_q = ggml_nelements(q);
5400
0
    const int64_t elem_k = ggml_nelements(k);
5401
0
    const int64_t elem_v = ggml_nelements(v);
5402
5403
0
    enum ggml_type result_type = GGML_TYPE_F32;
5404
0
    GGML_ASSERT(ggml_blck_size(result_type) == 1);
5405
0
    const size_t tsize = ggml_type_size(result_type);
5406
5407
0
    const size_t offs_q = 0;
5408
0
    const size_t offs_k = offs_q + GGML_PAD(elem_q * tsize, GGML_MEM_ALIGN);
5409
0
    const size_t offs_v = offs_k + GGML_PAD(elem_k * tsize, GGML_MEM_ALIGN);
5410
0
    const size_t end    = offs_v + GGML_PAD(elem_v * tsize, GGML_MEM_ALIGN);
5411
5412
0
    const size_t nelements = (end + tsize - 1)/tsize;
5413
5414
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, nelements);
5415
5416
0
    int32_t masked_i = masked ? 1 : 0;
5417
0
    ggml_set_op_params(result, &masked_i, sizeof(masked_i));
5418
5419
0
    result->op     = GGML_OP_FLASH_ATTN_BACK;
5420
0
    result->src[0] = q;
5421
0
    result->src[1] = k;
5422
0
    result->src[2] = v;
5423
0
    result->src[3] = d;
5424
5425
0
    return result;
5426
0
}
5427
5428
// ggml_ssm_conv
5429
5430
struct ggml_tensor * ggml_ssm_conv(
5431
        struct ggml_context * ctx,
5432
        struct ggml_tensor  * sx,
5433
0
        struct ggml_tensor  * c) {
5434
0
    GGML_ASSERT(ggml_is_3d(sx));
5435
0
    GGML_ASSERT(ggml_is_matrix(c));
5436
5437
0
    const int64_t d_conv  = c->ne[0];
5438
0
    const int64_t d_inner = c->ne[1];
5439
0
    const int64_t n_t     = sx->ne[0] - d_conv + 1; // tokens per sequence
5440
0
    const int64_t n_s     = sx->ne[2];
5441
5442
    // TODO: maybe support other strides than 1?
5443
0
    GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t);
5444
0
    GGML_ASSERT(sx->ne[1] == d_inner);
5445
0
    GGML_ASSERT(n_t >= 0);
5446
5447
0
    struct ggml_tensor * result = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_t, n_s);
5448
5449
0
    result->op     = GGML_OP_SSM_CONV;
5450
0
    result->src[0] = sx;
5451
0
    result->src[1] = c;
5452
5453
0
    return result;
5454
0
}
5455
5456
// ggml_ssm_scan
5457
5458
struct ggml_tensor * ggml_ssm_scan(
5459
        struct ggml_context * ctx,
5460
        struct ggml_tensor  * s,
5461
        struct ggml_tensor  * x,
5462
        struct ggml_tensor  * dt,
5463
        struct ggml_tensor  * A,
5464
        struct ggml_tensor  * B,
5465
        struct ggml_tensor  * C,
5466
0
        struct ggml_tensor  * ids) {
5467
0
    GGML_ASSERT(ggml_is_contiguous(s));
5468
0
    GGML_ASSERT(ggml_is_contiguous(dt));
5469
0
    GGML_ASSERT(ggml_is_contiguous(A));
5470
0
    GGML_ASSERT(x->nb[0] == ggml_type_size(x->type));
5471
0
    GGML_ASSERT(B->nb[0] == ggml_type_size(B->type));
5472
0
    GGML_ASSERT(C->nb[0] == ggml_type_size(C->type));
5473
0
    GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]);
5474
0
    GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]);
5475
0
    GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]);
5476
0
    GGML_ASSERT(ggml_are_same_shape(B, C));
5477
0
    GGML_ASSERT(ids->type == GGML_TYPE_I32);
5478
5479
0
    {
5480
0
        const int64_t d_state      = s->ne[0];
5481
0
        const int64_t head_dim     = x->ne[0];
5482
0
        const int64_t n_head       = x->ne[1];
5483
0
        const int64_t n_seq_tokens = x->ne[2];
5484
0
        const int64_t n_seqs       = x->ne[3];
5485
5486
0
        GGML_ASSERT(dt->ne[0] == n_head);
5487
0
        GGML_ASSERT(dt->ne[1] == n_seq_tokens);
5488
0
        GGML_ASSERT(dt->ne[2] == n_seqs);
5489
0
        GGML_ASSERT(ggml_is_3d(dt));
5490
0
        GGML_ASSERT(s->ne[1] == head_dim);
5491
0
        GGML_ASSERT(s->ne[2] == n_head);
5492
0
        GGML_ASSERT(B->ne[0] == d_state);
5493
0
        GGML_ASSERT(B->ne[2] == n_seq_tokens);
5494
0
        GGML_ASSERT(B->ne[3] == n_seqs);
5495
0
        GGML_ASSERT(ids->ne[0] == n_seqs);
5496
0
        GGML_ASSERT(ggml_is_vector(ids));
5497
0
        GGML_ASSERT(A->ne[1] == n_head);
5498
0
        GGML_ASSERT(ggml_is_matrix(A));
5499
5500
0
        if (A->ne[0] != 1) {
5501
            // Mamba-1 has more granular decay factors
5502
0
            GGML_ASSERT(A->ne[0] == d_state);
5503
0
        }
5504
0
    }
5505
5506
    // concatenated y + ssm_states
5507
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]);
5508
5509
0
    result->op   = GGML_OP_SSM_SCAN;
5510
0
    result->src[0] = s;
5511
0
    result->src[1] = x;
5512
0
    result->src[2] = dt;
5513
0
    result->src[3] = A;
5514
0
    result->src[4] = B;
5515
0
    result->src[5] = C;
5516
0
    result->src[6] = ids;
5517
5518
0
    return result;
5519
0
}
5520
5521
// ggml_win_part
5522
5523
struct ggml_tensor * ggml_win_part(
5524
        struct ggml_context * ctx,
5525
        struct ggml_tensor  * a,
5526
0
        int                   w) {
5527
0
    GGML_ASSERT(a->ne[3] == 1);
5528
0
    GGML_ASSERT(a->type  == GGML_TYPE_F32);
5529
5530
    // padding
5531
0
    const int px = (w - a->ne[1]%w)%w;
5532
0
    const int py = (w - a->ne[2]%w)%w;
5533
5534
0
    const int npx = (px + a->ne[1])/w;
5535
0
    const int npy = (py + a->ne[2])/w;
5536
0
    const int np  = npx*npy;
5537
5538
0
    const int64_t ne[4] = { a->ne[0], w, w, np, };
5539
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5540
5541
0
    int32_t params[] = { npx, npy, w };
5542
0
    ggml_set_op_params(result, params, sizeof(params));
5543
5544
0
    result->op     = GGML_OP_WIN_PART;
5545
0
    result->src[0] = a;
5546
5547
0
    return result;
5548
0
}
5549
5550
// ggml_win_unpart
5551
5552
struct ggml_tensor * ggml_win_unpart(
5553
        struct ggml_context * ctx,
5554
        struct ggml_tensor  * a,
5555
        int                   w0,
5556
        int                   h0,
5557
0
        int                   w) {
5558
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
5559
5560
0
    const int64_t ne[4] = { a->ne[0], w0, h0, 1, };
5561
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
5562
5563
0
    int32_t params[] = { w };
5564
0
    ggml_set_op_params(result, params, sizeof(params));
5565
5566
0
    result->op     = GGML_OP_WIN_UNPART;
5567
0
    result->src[0] = a;
5568
5569
0
    return result;
5570
0
}
5571
5572
// ggml_get_rel_pos
5573
5574
struct ggml_tensor * ggml_get_rel_pos(
5575
        struct ggml_context * ctx,
5576
        struct ggml_tensor  * a,
5577
        int                   qh,
5578
0
        int                   kh) {
5579
0
    GGML_ASSERT(qh == kh);
5580
0
    GGML_ASSERT(2*MAX(qh, kh) - 1 == a->ne[1]);
5581
5582
0
    const int64_t ne[4] = { a->ne[0], kh, qh, 1, };
5583
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F16, 3, ne);
5584
5585
0
    result->op     = GGML_OP_GET_REL_POS;
5586
0
    result->src[0] = a;
5587
5588
0
    return result;
5589
0
}
5590
5591
// ggml_add_rel_pos
5592
5593
static struct ggml_tensor * ggml_add_rel_pos_impl(
5594
        struct ggml_context * ctx,
5595
        struct ggml_tensor  * a,
5596
        struct ggml_tensor  * pw,
5597
        struct ggml_tensor  * ph,
5598
0
        bool                  inplace) {
5599
0
    GGML_ASSERT(ggml_are_same_shape(pw, ph));
5600
0
    GGML_ASSERT(ggml_is_contiguous(a));
5601
0
    GGML_ASSERT(ggml_is_contiguous(pw));
5602
0
    GGML_ASSERT(ggml_is_contiguous(ph));
5603
0
    GGML_ASSERT(ph->type == GGML_TYPE_F32);
5604
0
    GGML_ASSERT(pw->type == GGML_TYPE_F32);
5605
0
    GGML_ASSERT(pw->ne[3] == a->ne[2]);
5606
0
    GGML_ASSERT(pw->ne[0]*pw->ne[0] == a->ne[0]);
5607
0
    GGML_ASSERT(pw->ne[1]*pw->ne[2] == a->ne[1]);
5608
5609
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5610
0
    ggml_set_op_params_i32(result, 0, inplace ? 1 : 0);
5611
5612
0
    result->op     = GGML_OP_ADD_REL_POS;
5613
0
    result->src[0] = a;
5614
0
    result->src[1] = pw;
5615
0
    result->src[2] = ph;
5616
5617
0
    return result;
5618
0
}
5619
5620
struct ggml_tensor * ggml_add_rel_pos(
5621
        struct ggml_context * ctx,
5622
        struct ggml_tensor  * a,
5623
        struct ggml_tensor  * pw,
5624
0
        struct ggml_tensor  * ph) {
5625
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, false);
5626
0
}
5627
5628
struct ggml_tensor * ggml_add_rel_pos_inplace(
5629
        struct ggml_context * ctx,
5630
        struct ggml_tensor  * a,
5631
        struct ggml_tensor  * pw,
5632
0
        struct ggml_tensor  * ph) {
5633
0
    return ggml_add_rel_pos_impl(ctx, a, pw, ph, true);
5634
0
}
5635
5636
// ggml_rwkv_wkv6
5637
5638
struct ggml_tensor * ggml_rwkv_wkv6(
5639
        struct ggml_context * ctx,
5640
        struct ggml_tensor  * k,
5641
        struct ggml_tensor  * v,
5642
        struct ggml_tensor  * r,
5643
        struct ggml_tensor  * tf,
5644
        struct ggml_tensor  * td,
5645
0
        struct ggml_tensor  * state) {
5646
0
    GGML_ASSERT(ggml_is_contiguous(k));
5647
0
    GGML_ASSERT(ggml_is_contiguous(v));
5648
0
    GGML_ASSERT(ggml_is_contiguous(r));
5649
0
    GGML_ASSERT(ggml_is_contiguous(tf));
5650
0
    GGML_ASSERT(ggml_is_contiguous(td));
5651
0
    GGML_ASSERT(ggml_is_contiguous(state));
5652
5653
0
    const int64_t S = k->ne[0];
5654
0
    const int64_t H = k->ne[1];
5655
0
    const int64_t n_tokens = k->ne[2];
5656
0
    const int64_t n_seqs = state->ne[1];
5657
0
    {
5658
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5659
0
        GGML_ASSERT(r->ne[0] == S && r->ne[1] == H && r->ne[2] == n_tokens);
5660
0
        GGML_ASSERT(td->ne[0] == S && td->ne[1] == H && td->ne[2] == n_tokens);
5661
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5662
0
    }
5663
5664
    // concat output and new_state
5665
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5666
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5667
5668
0
    result->op     = GGML_OP_RWKV_WKV6;
5669
0
    result->src[0] = k;
5670
0
    result->src[1] = v;
5671
0
    result->src[2] = r;
5672
0
    result->src[3] = tf;
5673
0
    result->src[4] = td;
5674
0
    result->src[5] = state;
5675
5676
0
    return result;
5677
0
}
5678
5679
// ggml_gated_linear_attn
5680
5681
struct ggml_tensor * ggml_gated_linear_attn(
5682
        struct ggml_context * ctx,
5683
        struct ggml_tensor  * k,
5684
        struct ggml_tensor  * v,
5685
        struct ggml_tensor  * q,
5686
        struct ggml_tensor  * g,
5687
        struct ggml_tensor  * state,
5688
0
        float scale) {
5689
0
    GGML_ASSERT(ggml_is_contiguous(k));
5690
0
    GGML_ASSERT(ggml_is_contiguous(v));
5691
0
    GGML_ASSERT(ggml_is_contiguous(q));
5692
0
    GGML_ASSERT(ggml_is_contiguous(g));
5693
0
    GGML_ASSERT(ggml_is_contiguous(state));
5694
5695
0
    const int64_t S = k->ne[0];
5696
0
    const int64_t H = k->ne[1];
5697
0
    const int64_t n_tokens = k->ne[2];
5698
0
    const int64_t n_seqs = state->ne[1];
5699
0
    {
5700
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5701
0
        GGML_ASSERT(q->ne[0] == S && q->ne[1] == H && q->ne[2] == n_tokens);
5702
0
        GGML_ASSERT(g->ne[0] == S && g->ne[1] == H && g->ne[2] == n_tokens);
5703
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5704
0
    }
5705
5706
    // concat output and new_state
5707
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5708
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5709
5710
0
    ggml_set_op_params_f32(result, 0, scale);
5711
5712
0
    result->op     = GGML_OP_GATED_LINEAR_ATTN;
5713
0
    result->src[0] = k;
5714
0
    result->src[1] = v;
5715
0
    result->src[2] = q;
5716
0
    result->src[3] = g;
5717
0
    result->src[4] = state;
5718
5719
0
    return result;
5720
0
}
5721
5722
// ggml_rwkv_wkv7
5723
5724
struct ggml_tensor * ggml_rwkv_wkv7(
5725
        struct ggml_context * ctx,
5726
        struct ggml_tensor  * r,
5727
        struct ggml_tensor  * w,
5728
        struct ggml_tensor  * k,
5729
        struct ggml_tensor  * v,
5730
        struct ggml_tensor  * a,
5731
        struct ggml_tensor  * b,
5732
0
        struct ggml_tensor  * state) {
5733
0
    GGML_ASSERT(ggml_is_contiguous(r));
5734
0
    GGML_ASSERT(ggml_is_contiguous(w));
5735
0
    GGML_ASSERT(ggml_is_contiguous(k));
5736
0
    GGML_ASSERT(ggml_is_contiguous(v));
5737
0
    GGML_ASSERT(ggml_is_contiguous(a));
5738
0
    GGML_ASSERT(ggml_is_contiguous(b));
5739
0
    GGML_ASSERT(ggml_is_contiguous(state));
5740
5741
0
    const int64_t S = k->ne[0];
5742
0
    const int64_t H = k->ne[1];
5743
0
    const int64_t n_tokens = k->ne[2];
5744
0
    const int64_t n_seqs = state->ne[1];
5745
0
    {
5746
0
        GGML_ASSERT(w->ne[0] == S && w->ne[1] == H && w->ne[2] == n_tokens);
5747
0
        GGML_ASSERT(k->ne[0] == S && k->ne[1] == H && k->ne[2] == n_tokens);
5748
0
        GGML_ASSERT(v->ne[0] == S && v->ne[1] == H && v->ne[2] == n_tokens);
5749
0
        GGML_ASSERT(a->ne[0] == S && a->ne[1] == H && a->ne[2] == n_tokens);
5750
0
        GGML_ASSERT(b->ne[0] == S && b->ne[1] == H && b->ne[2] == n_tokens);
5751
0
        GGML_ASSERT(ggml_nelements(state) == S * S * H * n_seqs);
5752
0
    }
5753
5754
    // concat output and new_state
5755
0
    const int64_t ne[4] = { S * H, n_tokens + S * n_seqs, 1, 1 };
5756
0
    struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
5757
5758
0
    result->op     = GGML_OP_RWKV_WKV7;
5759
0
    result->src[0] = r;
5760
0
    result->src[1] = w;
5761
0
    result->src[2] = k;
5762
0
    result->src[3] = v;
5763
0
    result->src[4] = a;
5764
0
    result->src[5] = b;
5765
0
    result->src[6] = state;
5766
5767
0
    return result;
5768
0
}
5769
5770
// ggml_unary
5771
5772
static struct ggml_tensor * ggml_unary_impl(
5773
        struct ggml_context * ctx,
5774
        struct ggml_tensor  * a,
5775
        enum ggml_unary_op    op,
5776
0
        bool                  inplace) {
5777
0
    GGML_ASSERT(ggml_is_contiguous_rows(a));
5778
5779
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5780
5781
0
    ggml_set_op_params_i32(result, 0, (int32_t) op);
5782
5783
0
    result->op     = GGML_OP_UNARY;
5784
0
    result->src[0] = a;
5785
5786
0
    return result;
5787
0
}
5788
5789
struct ggml_tensor * ggml_unary(
5790
        struct ggml_context * ctx,
5791
        struct ggml_tensor  * a,
5792
0
        enum ggml_unary_op    op) {
5793
0
    return ggml_unary_impl(ctx, a, op, false);
5794
0
}
5795
5796
struct ggml_tensor * ggml_unary_inplace(
5797
        struct ggml_context * ctx,
5798
        struct ggml_tensor  * a,
5799
0
        enum ggml_unary_op    op) {
5800
0
    return ggml_unary_impl(ctx, a, op, true);
5801
0
}
5802
5803
// ggml_map_custom1
5804
5805
static struct ggml_tensor * ggml_map_custom1_impl(
5806
        struct ggml_context      * ctx,
5807
        struct ggml_tensor       * a,
5808
        const  ggml_custom1_op_t   fun,
5809
        int                        n_tasks,
5810
        void                     * userdata,
5811
0
        bool                       inplace) {
5812
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5813
5814
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5815
5816
0
    struct ggml_map_custom1_op_params params = {
5817
0
        /*.fun      =*/ fun,
5818
0
        /*.n_tasks  =*/ n_tasks,
5819
0
        /*.userdata =*/ userdata
5820
0
    };
5821
0
    ggml_set_op_params(result, &params, sizeof(params));
5822
5823
0
    result->op     = GGML_OP_MAP_CUSTOM1;
5824
0
    result->src[0] = a;
5825
5826
0
    return result;
5827
0
}
5828
5829
struct ggml_tensor * ggml_map_custom1(
5830
        struct ggml_context      * ctx,
5831
        struct ggml_tensor       * a,
5832
        const  ggml_custom1_op_t   fun,
5833
        int                        n_tasks,
5834
0
        void                     * userdata) {
5835
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
5836
0
}
5837
5838
struct ggml_tensor * ggml_map_custom1_inplace(
5839
        struct ggml_context      * ctx,
5840
        struct ggml_tensor       * a,
5841
        const  ggml_custom1_op_t   fun,
5842
        int                        n_tasks,
5843
0
        void                     * userdata) {
5844
0
    return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
5845
0
}
5846
5847
// ggml_map_custom2
5848
5849
static struct ggml_tensor * ggml_map_custom2_impl(
5850
        struct ggml_context      * ctx,
5851
        struct ggml_tensor       * a,
5852
        struct ggml_tensor       * b,
5853
        const  ggml_custom2_op_t   fun,
5854
        int                        n_tasks,
5855
        void                     * userdata,
5856
0
        bool                       inplace) {
5857
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5858
5859
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5860
5861
0
    struct ggml_map_custom2_op_params params = {
5862
0
        /*.fun      =*/ fun,
5863
0
        /*.n_tasks  =*/ n_tasks,
5864
0
        /*.userdata =*/ userdata
5865
0
    };
5866
0
    ggml_set_op_params(result, &params, sizeof(params));
5867
5868
0
    result->op     = GGML_OP_MAP_CUSTOM2;
5869
0
    result->src[0] = a;
5870
0
    result->src[1] = b;
5871
5872
0
    return result;
5873
0
}
5874
5875
struct ggml_tensor * ggml_map_custom2(
5876
        struct ggml_context      * ctx,
5877
        struct ggml_tensor       * a,
5878
        struct ggml_tensor       * b,
5879
        const  ggml_custom2_op_t   fun,
5880
        int                        n_tasks,
5881
0
        void                     * userdata) {
5882
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
5883
0
}
5884
5885
struct ggml_tensor * ggml_map_custom2_inplace(
5886
        struct ggml_context      * ctx,
5887
        struct ggml_tensor       * a,
5888
        struct ggml_tensor       * b,
5889
        const  ggml_custom2_op_t   fun,
5890
        int                        n_tasks,
5891
0
        void                     * userdata) {
5892
0
    return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
5893
0
}
5894
5895
// ggml_map_custom3
5896
5897
static struct ggml_tensor * ggml_map_custom3_impl(
5898
        struct ggml_context      * ctx,
5899
        struct ggml_tensor       * a,
5900
        struct ggml_tensor       * b,
5901
        struct ggml_tensor       * c,
5902
        const  ggml_custom3_op_t   fun,
5903
        int                        n_tasks,
5904
        void                     * userdata,
5905
0
        bool                       inplace) {
5906
0
    GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
5907
5908
0
    struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
5909
5910
0
    struct ggml_map_custom3_op_params params = {
5911
0
        /*.fun      =*/ fun,
5912
0
        /*.n_tasks  =*/ n_tasks,
5913
0
        /*.userdata =*/ userdata
5914
0
    };
5915
0
    ggml_set_op_params(result, &params, sizeof(params));
5916
5917
0
    result->op     = GGML_OP_MAP_CUSTOM3;
5918
0
    result->src[0] = a;
5919
0
    result->src[1] = b;
5920
0
    result->src[2] = c;
5921
5922
0
    return result;
5923
0
}
5924
5925
struct ggml_tensor * ggml_map_custom3(
5926
        struct ggml_context      * ctx,
5927
        struct ggml_tensor       * a,
5928
        struct ggml_tensor       * b,
5929
        struct ggml_tensor       * c,
5930
        const  ggml_custom3_op_t   fun,
5931
        int                        n_tasks,
5932
0
        void                     * userdata) {
5933
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
5934
0
}
5935
5936
struct ggml_tensor * ggml_map_custom3_inplace(
5937
        struct ggml_context      * ctx,
5938
        struct ggml_tensor       * a,
5939
        struct ggml_tensor       * b,
5940
        struct ggml_tensor       * c,
5941
        const  ggml_custom3_op_t   fun,
5942
        int                        n_tasks,
5943
0
        void                     * userdata) {
5944
0
    return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
5945
0
}
5946
5947
struct ggml_tensor * ggml_custom_4d(
5948
        struct ggml_context * ctx,
5949
        enum ggml_type        type,
5950
        int64_t               ne0,
5951
        int64_t               ne1,
5952
        int64_t               ne2,
5953
        int64_t               ne3,
5954
        struct ggml_tensor ** args,
5955
        int                   n_args,
5956
        ggml_custom_op_t      fun,
5957
        int                   n_tasks,
5958
0
        void                * userdata) {
5959
5960
0
    GGML_ASSERT(n_args < GGML_MAX_SRC);
5961
5962
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, type, ne0, ne1, ne2, ne3);
5963
5964
0
    struct ggml_custom_op_params params = {
5965
0
        /*.fun      =*/ fun,
5966
0
        /*.n_tasks  =*/ n_tasks,
5967
0
        /*.userdata =*/ userdata
5968
0
    };
5969
0
    ggml_set_op_params(result, &params, sizeof(params));
5970
5971
0
    result->op = GGML_OP_CUSTOM;
5972
0
    for (int i = 0; i < n_args; i++) {
5973
0
        result->src[i] = args[i];
5974
0
    }
5975
5976
0
    return result;
5977
0
}
5978
5979
struct ggml_tensor * ggml_custom_inplace(
5980
        struct ggml_context * ctx,
5981
        struct ggml_tensor  * a,
5982
        struct ggml_tensor ** args,
5983
        int                   n_args,
5984
        ggml_custom_op_t      fun,
5985
        int                   n_tasks,
5986
0
        void                * userdata) {
5987
5988
0
    GGML_ASSERT(n_args < GGML_MAX_SRC - 1);
5989
5990
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
5991
5992
0
    struct ggml_custom_op_params params = {
5993
0
        /*.fun      =*/ fun,
5994
0
        /*.n_tasks  =*/ n_tasks,
5995
0
        /*.userdata =*/ userdata
5996
0
    };
5997
0
    ggml_set_op_params(result, &params, sizeof(params));
5998
5999
0
    result->op = GGML_OP_CUSTOM;
6000
0
    result->src[0] = a;
6001
0
    for (int i = 0; i < n_args; i++) {
6002
0
        result->src[i + 1] = args[i];
6003
0
    }
6004
6005
0
    return result;
6006
0
}
6007
// ggml_cross_entropy_loss
6008
6009
struct ggml_tensor * ggml_cross_entropy_loss(
6010
        struct ggml_context * ctx,
6011
        struct ggml_tensor  * a,
6012
0
        struct ggml_tensor  * b) {
6013
0
    GGML_ASSERT(ggml_are_same_shape(a, b));
6014
6015
0
    struct ggml_tensor * result = ggml_new_tensor_1d(ctx, a->type, 1);
6016
6017
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS;
6018
0
    result->src[0] = a;
6019
0
    result->src[1] = b;
6020
6021
0
    return result;
6022
0
}
6023
6024
// ggml_cross_entropy_loss_back
6025
6026
struct ggml_tensor * ggml_cross_entropy_loss_back(
6027
        struct ggml_context * ctx,
6028
        struct ggml_tensor  * a,
6029
        struct ggml_tensor  * b,
6030
0
        struct ggml_tensor  * c) {
6031
0
    GGML_ASSERT(ggml_is_scalar(a));
6032
0
    GGML_ASSERT(ggml_are_same_shape(b, c));
6033
6034
0
    struct ggml_tensor * result = ggml_dup_tensor(ctx, b);
6035
6036
0
    result->op     = GGML_OP_CROSS_ENTROPY_LOSS_BACK;
6037
0
    result->src[0] = a;
6038
0
    result->src[1] = b;
6039
0
    result->src[2] = c;
6040
6041
0
    return result;
6042
0
}
6043
6044
// opt_step_adamw
6045
6046
struct ggml_tensor * ggml_opt_step_adamw(
6047
        struct ggml_context * ctx,
6048
        struct ggml_tensor  * a,
6049
        struct ggml_tensor  * grad,
6050
        struct ggml_tensor  * m,
6051
        struct ggml_tensor  * v,
6052
0
        struct ggml_tensor  * adamw_params) {
6053
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6054
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6055
0
    GGML_ASSERT(ggml_are_same_shape(a, m));
6056
0
    GGML_ASSERT(ggml_are_same_shape(a, v));
6057
0
    GGML_ASSERT(adamw_params->type == GGML_TYPE_F32);
6058
0
    GGML_ASSERT(ggml_nelements(adamw_params) == 7);
6059
6060
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6061
6062
0
    result->op     = GGML_OP_OPT_STEP_ADAMW;
6063
0
    result->src[0] = a;
6064
0
    result->src[1] = grad;
6065
0
    result->src[2] = m;
6066
0
    result->src[3] = v;
6067
0
    result->src[4] = adamw_params;
6068
6069
0
    return result;
6070
0
}
6071
6072
// opt_step_sgd
6073
6074
struct ggml_tensor * ggml_opt_step_sgd(
6075
        struct ggml_context * ctx,
6076
        struct ggml_tensor  * a,
6077
        struct ggml_tensor  * grad,
6078
0
        struct ggml_tensor  * params) {
6079
0
    GGML_ASSERT(a->flags & GGML_TENSOR_FLAG_PARAM);
6080
0
    GGML_ASSERT(ggml_are_same_shape(a, grad));
6081
0
    GGML_ASSERT(params->type == GGML_TYPE_F32);
6082
0
    GGML_ASSERT(ggml_nelements(params) == 2);
6083
6084
0
    struct ggml_tensor * result = ggml_view_tensor(ctx, a);
6085
6086
0
    result->op     = GGML_OP_OPT_STEP_SGD;
6087
0
    result->src[0] = a;
6088
0
    result->src[1] = grad;
6089
0
    result->src[2] = params;
6090
6091
0
    return result;
6092
0
}
6093
6094
// solve_tri
6095
6096
struct ggml_tensor * ggml_solve_tri(
6097
        struct ggml_context * ctx,
6098
        struct ggml_tensor  * a,
6099
        struct ggml_tensor  * b,
6100
        bool                  left,
6101
        bool                  lower,
6102
0
        bool                  uni) {
6103
0
    GGML_ASSERT(a->type == GGML_TYPE_F32);
6104
0
    GGML_ASSERT(b->type == GGML_TYPE_F32);
6105
6106
    // A must be square and lower diagonal
6107
0
    GGML_ASSERT(a->ne[0] == a->ne[1]);
6108
    // B must have same outer dimension as A
6109
0
    GGML_ASSERT(a->ne[1] == b->ne[1]);
6110
6111
    // batch dimensions must be equal
6112
0
    GGML_ASSERT(a->ne[2] == b->ne[2]);
6113
0
    GGML_ASSERT(a->ne[3] == b->ne[3]);
6114
6115
0
    GGML_ASSERT(ggml_is_contiguous(a));
6116
0
    GGML_ASSERT(ggml_is_contiguous(b));
6117
6118
0
    GGML_ASSERT(lower && left && !uni); // TODO: support other variants
6119
6120
0
    struct ggml_tensor * result = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, b->ne[0], b->ne[1], b->ne[2], b->ne[3]);
6121
6122
0
    result->op     = GGML_OP_SOLVE_TRI;
6123
0
    result->src[0] = a;
6124
0
    result->src[1] = b;
6125
6126
0
    return result;
6127
0
}
6128
6129
////////////////////////////////////////////////////////////////////////////////
6130
6131
0
struct ggml_hash_set ggml_hash_set_new(size_t size) {
6132
0
    size = ggml_hash_size(size);
6133
0
    struct ggml_hash_set result;
6134
0
    result.size = size;
6135
0
    result.keys = GGML_MALLOC(sizeof(struct ggml_tensor *) * size);
6136
0
    result.used = GGML_CALLOC(ggml_bitset_size(size), sizeof(ggml_bitset_t));
6137
0
    return result;
6138
0
}
6139
6140
0
void ggml_hash_set_reset(struct ggml_hash_set * hash_set) {
6141
0
    memset(hash_set->used, 0, sizeof(ggml_bitset_t) * ggml_bitset_size(hash_set->size));
6142
0
}
6143
6144
0
void ggml_hash_set_free(struct ggml_hash_set * hash_set) {
6145
0
    GGML_FREE(hash_set->used);
6146
0
    GGML_FREE(hash_set->keys);
6147
0
}
6148
6149
0
size_t ggml_hash_size(size_t min_sz) {
6150
    // next primes after powers of two
6151
0
    static const size_t primes[] = {
6152
0
        2, 3, 5, 11, 17, 37, 67, 131, 257, 521, 1031,
6153
0
        2053, 4099, 8209, 16411, 32771, 65537, 131101,
6154
0
        262147, 524309, 1048583, 2097169, 4194319, 8388617,
6155
0
        16777259, 33554467, 67108879, 134217757, 268435459,
6156
0
        536870923, 1073741827, 2147483659
6157
0
    };
6158
0
    static const size_t n_primes = sizeof(primes)/sizeof(primes[0]);
6159
6160
    // find the smallest prime that is larger or equal than min_sz
6161
0
    size_t l = 0;
6162
0
    size_t r = n_primes;
6163
0
    while (l < r) {
6164
0
        size_t m = (l + r)/2;
6165
0
        if (primes[m] < min_sz) {
6166
0
            l = m + 1;
6167
0
        } else {
6168
0
            r = m;
6169
0
        }
6170
0
    }
6171
0
    size_t sz = l < n_primes ? primes[l] : min_sz | 1;
6172
0
    return sz;
6173
0
}
6174
6175
struct hash_map {
6176
    struct ggml_hash_set set;
6177
    struct ggml_tensor ** vals;
6178
};
6179
6180
0
static struct hash_map * ggml_new_hash_map(size_t size) {
6181
0
    struct hash_map * result = GGML_MALLOC(sizeof(struct hash_map));
6182
0
    result->set = ggml_hash_set_new(size);
6183
0
    result->vals = GGML_CALLOC(result->set.size, sizeof(struct ggml_tensor *));
6184
0
    return result;
6185
0
}
6186
6187
0
static void ggml_hash_map_free(struct hash_map * map) {
6188
0
    ggml_hash_set_free(&map->set);
6189
0
    GGML_FREE(map->vals);
6190
0
    GGML_FREE(map);
6191
0
}
6192
6193
// utility functions to change gradients
6194
// isrc is the index of tensor in cgraph->visited_has_set.keys
6195
// the corresponding gradient (accumulators) are also at position isrc
6196
// if tensor has a gradient accumulator, modify that accumulator in-place
6197
// else if there is no gradient for tensor, set the corresponding value
6198
// else, just add/subtract/etc. the gradients
6199
6200
static void ggml_add_or_set(
6201
        struct ggml_context * ctx,
6202
        struct ggml_cgraph  * cgraph,
6203
        size_t                isrc,
6204
0
        struct ggml_tensor  * tensor) {
6205
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6206
0
    GGML_ASSERT(src);
6207
0
    if (cgraph->grads[isrc]) {
6208
0
        cgraph->grads[isrc] = ggml_add_impl(ctx, cgraph->grads[isrc], tensor, /*inplace =*/ cgraph->grad_accs[isrc]);
6209
0
    } else {
6210
0
        cgraph->grads[isrc] = tensor;
6211
0
    }
6212
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6213
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6214
0
}
6215
6216
static void ggml_acc_or_set(
6217
        struct ggml_context * ctx,
6218
        struct ggml_cgraph  * cgraph,
6219
        size_t                isrc,
6220
        struct ggml_tensor  * tensor,
6221
        const  size_t         nb1,
6222
        const  size_t         nb2,
6223
        const  size_t         nb3,
6224
0
        const  size_t         offset) {
6225
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6226
0
    GGML_ASSERT(src);
6227
0
    if (cgraph->grads[isrc]) {
6228
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, cgraph->grads[isrc], tensor, nb1, nb2, nb3, offset, cgraph->grad_accs[isrc]);
6229
0
    } else {
6230
0
        struct ggml_tensor * a_zero = ggml_scale(ctx, src, 0.0f); // FIXME this is going to produce NaN if a contains inf/NaN
6231
0
        cgraph->grads[isrc] = ggml_acc_impl(ctx, a_zero, tensor, nb1, nb2, nb3, offset, false);
6232
0
    }
6233
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", cgraph->visited_hash_set.keys[isrc]->name);
6234
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6235
0
}
6236
6237
static void ggml_add1_or_set(
6238
        struct ggml_context * ctx,
6239
        struct ggml_cgraph  * cgraph,
6240
        size_t                isrc,
6241
0
        struct ggml_tensor  * tensor) {
6242
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6243
0
    GGML_ASSERT(src);
6244
0
    if (cgraph->grads[isrc]) {
6245
0
        cgraph->grads[isrc] = ggml_add1_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6246
0
    } else {
6247
0
        cgraph->grads[isrc] = ggml_repeat(ctx, tensor, src);
6248
0
    }
6249
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6250
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6251
0
}
6252
6253
static void ggml_sub_or_set(
6254
        struct ggml_context * ctx,
6255
        struct ggml_cgraph  * cgraph,
6256
        size_t                isrc,
6257
0
        struct ggml_tensor  * tensor) {
6258
0
    struct ggml_tensor * src = cgraph->visited_hash_set.keys[isrc];
6259
0
    GGML_ASSERT(src);
6260
0
    if (cgraph->grads[isrc]) {
6261
0
        cgraph->grads[isrc] = ggml_sub_impl(ctx, cgraph->grads[isrc], tensor, cgraph->grad_accs[isrc]);
6262
0
    } else {
6263
0
        cgraph->grads[isrc] = ggml_neg(ctx, tensor);
6264
0
    }
6265
0
    ggml_format_name(cgraph->grads[isrc], "grad for %s", src->name);
6266
0
    ggml_build_forward_expand(cgraph, cgraph->grads[isrc]);
6267
0
}
6268
6269
static void ggml_compute_backward(
6270
0
        struct ggml_context * ctx, struct ggml_cgraph * cgraph, int i, const bool * grads_needed) {
6271
0
    struct ggml_tensor * tensor = cgraph->nodes[i];
6272
0
    struct ggml_tensor * grad   = ggml_graph_get_grad(cgraph, tensor);
6273
6274
0
    if (!grad) {
6275
0
        return;
6276
0
    }
6277
6278
0
    struct ggml_tensor * src0 = tensor->src[0];
6279
0
    struct ggml_tensor * src1 = tensor->src[1];
6280
0
    struct ggml_tensor * src2 = tensor->src[2];
6281
0
    struct ggml_hash_set * hash_set = &cgraph->visited_hash_set;
6282
0
    const size_t isrc0 = src0 ? ggml_hash_find(hash_set, src0) : (size_t) -1;
6283
0
    const size_t isrc1 = src1 ? ggml_hash_find(hash_set, src1) : (size_t) -1;
6284
0
    const size_t isrc2 = src2 ? ggml_hash_find(hash_set, src2) : (size_t) -1;
6285
0
    const bool src0_needs_grads = src0 && isrc0 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc0) && grads_needed[isrc0];
6286
0
    const bool src1_needs_grads = src1 && isrc1 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc1) && grads_needed[isrc1];
6287
0
    const bool src2_needs_grads = src2 && isrc2 != GGML_HASHSET_FULL && ggml_bitset_get(hash_set->used, isrc2) && grads_needed[isrc2];
6288
6289
0
    switch (tensor->op) {
6290
0
        case GGML_OP_DUP: {
6291
0
            if (src0_needs_grads) {
6292
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6293
0
            }
6294
0
        } break;
6295
0
        case GGML_OP_ADD: {
6296
0
            if (src0_needs_grads) {
6297
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6298
0
            }
6299
0
            if (src1_needs_grads) {
6300
0
                struct ggml_tensor * tmp = grad;
6301
0
                if (!ggml_are_same_shape(src0, src1)) {
6302
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6303
0
                }
6304
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6305
0
            }
6306
0
        } break;
6307
0
        case GGML_OP_ADD1: {
6308
0
            if (src0_needs_grads) {
6309
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6310
0
            }
6311
0
            if (src1_needs_grads) {
6312
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_mean(ctx, grad)); // TODO: should probably be sum instead of mean
6313
0
            }
6314
0
        } break;
6315
0
        case GGML_OP_ACC: {
6316
0
            if (src0_needs_grads) {
6317
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6318
0
            }
6319
0
            if (src1_needs_grads) {
6320
0
                const size_t nb1    = ((int32_t *) tensor->op_params)[0];
6321
0
                const size_t nb2    = ((int32_t *) tensor->op_params)[1];
6322
0
                const size_t nb3    = ((int32_t *) tensor->op_params)[2];
6323
0
                const size_t offset = ((int32_t *) tensor->op_params)[3];
6324
6325
0
                struct ggml_tensor * tensor_grad_view = ggml_view_4d(ctx,
6326
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6327
0
                    nb1, nb2, nb3, offset);
6328
6329
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6330
0
            }
6331
0
        } break;
6332
0
        case GGML_OP_SUB: {
6333
0
            if (src0_needs_grads) {
6334
0
                ggml_add_or_set(ctx, cgraph, isrc0, grad);
6335
0
            }
6336
0
            if (src1_needs_grads) {
6337
0
                ggml_sub_or_set(ctx, cgraph, isrc1, grad);
6338
0
            }
6339
0
        } break;
6340
0
        case GGML_OP_MUL: {
6341
0
            if (src0_needs_grads) {
6342
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, src1));
6343
0
            }
6344
0
            if (src1_needs_grads) {
6345
0
                struct ggml_tensor * tmp = ggml_mul(ctx, src0, grad);
6346
0
                if (!ggml_are_same_shape(src0, src1)) {
6347
0
                    tmp = ggml_repeat_back(ctx, tmp, src1);
6348
0
                }
6349
0
                ggml_add_or_set(ctx, cgraph, isrc1, tmp);
6350
0
            }
6351
0
        } break;
6352
0
        case GGML_OP_DIV: {
6353
0
            if (src0_needs_grads) {
6354
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src1));
6355
0
            }
6356
0
            if (src1_needs_grads) {
6357
0
                ggml_sub_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, grad, ggml_div(ctx, tensor, src1)));
6358
0
            }
6359
0
        } break;
6360
0
        case GGML_OP_SQR: {
6361
0
            if (src0_needs_grads) {
6362
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_mul(ctx, src0, grad), 2.0f));
6363
0
            }
6364
0
        } break;
6365
0
        case GGML_OP_SQRT: {
6366
0
            if (src0_needs_grads) {
6367
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale(ctx, ggml_div(ctx, grad, tensor), 0.5f));
6368
0
            }
6369
0
        } break;
6370
0
        case GGML_OP_LOG: {
6371
0
            if (src0_needs_grads) {
6372
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_div(ctx, grad, src0));
6373
0
            }
6374
0
        } break;
6375
0
        case GGML_OP_SIN: {
6376
0
            if (src0_needs_grads) {
6377
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_cos(ctx, src0)));
6378
0
            }
6379
0
        } break;
6380
0
        case GGML_OP_COS: {
6381
0
            if (src0_needs_grads) {
6382
0
                ggml_sub_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sin(ctx, src0)));
6383
0
            }
6384
0
        } break;
6385
0
        case GGML_OP_SUM: {
6386
0
            if (src0_needs_grads) {
6387
0
                ggml_add1_or_set(ctx, cgraph, isrc0, grad);
6388
0
            }
6389
0
        } break;
6390
0
        case GGML_OP_SUM_ROWS: {
6391
0
            if (src0_needs_grads) {
6392
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6393
0
            }
6394
0
        } break;
6395
0
        case GGML_OP_MEAN: {
6396
0
            if (src0_needs_grads) {
6397
0
                ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false));
6398
0
            }
6399
0
        } break;
6400
0
        case GGML_OP_REPEAT: {
6401
0
            if (src0_needs_grads) {
6402
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat_back(ctx, grad, src0));
6403
0
            }
6404
0
        } break;
6405
0
        case GGML_OP_REPEAT_BACK: {
6406
0
            if (src0_needs_grads) {
6407
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_repeat(ctx, grad, src0));
6408
0
            }
6409
0
        } break;
6410
0
        case GGML_OP_RMS_NORM: {
6411
0
            if (src0_needs_grads) {
6412
0
                float eps;
6413
0
                memcpy(&eps, tensor->op_params, sizeof(float));
6414
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_rms_norm_back(ctx, grad, src0, eps));
6415
0
            }
6416
0
        } break;
6417
0
        case GGML_OP_MUL_MAT: {
6418
            // https://cs231n.github.io/optimization-2/#staged
6419
            // # forward pass
6420
            // s0 = np.random.randn(5, 10)
6421
            // s1 = np.random.randn(10, 3)
6422
            // t = s0.dot(s1)
6423
6424
            // # now suppose we had the gradient on t from above in the circuit
6425
            // dt = np.random.randn(*t.shape) # same shape as t
6426
            // ds0 = dt.dot(s1.T) #.T gives the transpose of the matrix
6427
            // ds1 = t.T.dot(dt)
6428
6429
            // tensor.shape [m,p,qq,rr]
6430
            // src0.shape   [n,m,q1,r1]
6431
            // src1.shape   [n,p,qq,rr]
6432
6433
0
            if (src0_needs_grads) {
6434
0
                GGML_ASSERT(grad->ne[2] == src1->ne[2]);
6435
0
                GGML_ASSERT(grad->ne[3] == src1->ne[3]);
6436
0
                struct ggml_tensor * tmp =
6437
0
                    ggml_out_prod(ctx, // [n,m,qq,rr]
6438
0
                        src1,          // [n,p,qq,rr]
6439
0
                        grad);         // [m,p,qq,rr]
6440
0
                if (!ggml_are_same_shape(tmp, src0)) {
6441
0
                    GGML_ASSERT(tmp->ne[0] == src0->ne[0]);
6442
0
                    GGML_ASSERT(tmp->ne[1] == src0->ne[1]);
6443
0
                    GGML_ASSERT(tmp->ne[3] == 1);
6444
6445
0
                    const int64_t nr2 = tmp->ne[2] / src0->ne[2];
6446
0
                    const size_t nb2 = tmp->nb[2] * nr2;
6447
0
                    const size_t nb3 = tmp->nb[2];
6448
6449
0
                    tmp = ggml_view_4d(ctx, tmp, src0->ne[0], src0->ne[1], src0->ne[2], nr2, tmp->nb[1], nb2, nb3, 0);
6450
0
                    tmp = ggml_repeat_back(ctx, tmp, src0);
6451
0
                }
6452
0
                ggml_add_or_set(ctx, cgraph, isrc0, tmp);
6453
0
            }
6454
0
            if (src1_needs_grads) {
6455
0
                ggml_add_or_set(ctx, cgraph, isrc1,
6456
                        // ggml_mul_mat(ctx,                   // [n,p,qq,rr]
6457
                        //     ggml_cont(ctx,                  // [m,n,q1,r1]
6458
                        //         ggml_transpose(ctx, src0)), // [m,n,q1,r1]
6459
                        //     grad),                          // [m,p,qq,rr]
6460
6461
                        // when src0 is bigger than tensor->grad (this is mostly the case in llama),
6462
                        // avoid transpose of src0, rather transpose smaller tensor->grad
6463
                        // and then use ggml_out_prod
6464
0
                        ggml_out_prod(ctx,      // [n,p,qq,rr]
6465
0
                            src0,               // [n,m,q1,r1]
6466
0
                            ggml_transpose(ctx, // [p,m,qq,rr]
6467
0
                                grad)));        // [m,p,qq,rr]
6468
0
            }
6469
0
        } break;
6470
0
        case GGML_OP_SCALE: {
6471
0
            if (src0_needs_grads) {
6472
0
                float s;
6473
0
                memcpy(&s, tensor->op_params, sizeof(float));
6474
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false));
6475
0
            }
6476
0
        } break;
6477
0
        case GGML_OP_SET: {
6478
0
            const size_t nb1    = ((const int32_t *) tensor->op_params)[0];
6479
0
            const size_t nb2    = ((const int32_t *) tensor->op_params)[1];
6480
0
            const size_t nb3    = ((const int32_t *) tensor->op_params)[2];
6481
0
            const size_t offset = ((const int32_t *) tensor->op_params)[3];
6482
6483
0
            struct ggml_tensor * tensor_grad_view = NULL;
6484
6485
0
            if (src0_needs_grads || src1_needs_grads) {
6486
0
                GGML_ASSERT(src0->type == tensor->type);
6487
0
                GGML_ASSERT(!cgraph->grads[isrc0] ||                      cgraph->grads[isrc0]->type == grad->type);
6488
0
                GGML_ASSERT(!cgraph->grads[isrc1] || !src1_needs_grads || cgraph->grads[isrc1]->type == grad->type);
6489
6490
0
                tensor_grad_view = ggml_view_4d(ctx,
6491
0
                    grad, src1->ne[0], src1->ne[1], src1->ne[2], src1->ne[3],
6492
0
                    nb1, nb2, nb3, offset);
6493
0
            }
6494
6495
0
            if (src0_needs_grads) {
6496
0
                struct ggml_tensor * tmp = ggml_neg(ctx, tensor_grad_view);
6497
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_acc_impl(ctx, grad, tmp, nb1, nb2, nb3, offset, false));
6498
0
            }
6499
6500
0
            if (src1_needs_grads) {
6501
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_reshape(ctx, ggml_cont(ctx, tensor_grad_view), src1));
6502
0
            }
6503
0
        } break;
6504
0
        case GGML_OP_CPY: {
6505
            // cpy overwrites value of src1 by src0 and returns view(src1)
6506
            // the overwriting is mathematically equivalent to:
6507
            // tensor = src0 * 1 + src1 * 0
6508
0
            if (src0_needs_grads) {
6509
                // dsrc0 = dtensor * 1
6510
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad, src0));
6511
0
            }
6512
0
            if (src1_needs_grads) {
6513
                // dsrc1 = dtensor * 0 -> noop
6514
0
            }
6515
0
        } break;
6516
0
        case GGML_OP_CONT: {
6517
            // same as cpy
6518
0
            if (src0_needs_grads) {
6519
0
                GGML_ASSERT(!cgraph->grads[isrc0] || ggml_is_contiguous(cgraph->grads[isrc0]));
6520
0
                GGML_ASSERT(ggml_is_contiguous(grad));
6521
0
                GGML_ASSERT(ggml_nelements(tensor) == ggml_nelements(src0));
6522
0
                ggml_add_or_set(ctx, cgraph, isrc0,
6523
0
                    ggml_are_same_shape(tensor, src0) ? grad : ggml_reshape(ctx, grad, src0));
6524
0
            }
6525
0
        } break;
6526
0
        case GGML_OP_RESHAPE: {
6527
0
            if (src0_needs_grads) {
6528
0
                struct ggml_tensor * grad_cont = ggml_is_contiguous(grad) ? grad : ggml_cont(ctx, grad);
6529
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_reshape(ctx, grad_cont, src0));
6530
0
            }
6531
0
        } break;
6532
0
        case GGML_OP_VIEW: {
6533
0
            if (src0_needs_grads) {
6534
0
                size_t offset;
6535
6536
0
                memcpy(&offset, tensor->op_params, sizeof(offset));
6537
6538
0
                size_t nb1 = tensor->nb[1];
6539
0
                size_t nb2 = tensor->nb[2];
6540
0
                size_t nb3 = tensor->nb[3];
6541
6542
0
                if (cgraph->grads[isrc0] && src0->type != cgraph->grads[isrc0]->type) {
6543
                    // gradient is typically F32, but src0 could be other type
6544
0
                    size_t ng = ggml_element_size(cgraph->grads[isrc0]);
6545
0
                    size_t n0 = ggml_element_size(src0);
6546
0
                    GGML_ASSERT(offset % n0 == 0);
6547
0
                    GGML_ASSERT(nb1 % n0 == 0);
6548
0
                    GGML_ASSERT(nb2 % n0 == 0);
6549
0
                    GGML_ASSERT(nb3 % n0 == 0);
6550
0
                    offset = (offset / n0) * ng;
6551
0
                    nb1 = (nb1 / n0) * ng;
6552
0
                    nb2 = (nb2 / n0) * ng;
6553
0
                    nb3 = (nb3 / n0) * ng;
6554
0
                }
6555
6556
0
                ggml_acc_or_set(ctx, cgraph, isrc0, grad, nb1, nb2, nb3, offset);
6557
0
            }
6558
0
        } break;
6559
0
        case GGML_OP_PERMUTE: {
6560
0
            if (src0_needs_grads) {
6561
0
                const int32_t * axes = (const int32_t *) tensor->op_params;
6562
0
                const int axis0 = axes[0] & 0x3;
6563
0
                const int axis1 = axes[1] & 0x3;
6564
0
                const int axis2 = axes[2] & 0x3;
6565
0
                const int axis3 = axes[3] & 0x3;
6566
0
                int axb[4] = {0,0,0,0}; // axes backward
6567
0
                axb[axis0] = 0;
6568
0
                axb[axis1] = 1;
6569
0
                axb[axis2] = 2;
6570
0
                axb[axis3] = 3;
6571
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_permute(ctx, grad, axb[0], axb[1], axb[2], axb[3]));
6572
0
            }
6573
0
        } break;
6574
0
        case GGML_OP_TRANSPOSE: {
6575
0
            if (src0_needs_grads) {
6576
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_transpose(ctx, grad));
6577
0
            }
6578
0
        } break;
6579
0
        case GGML_OP_GET_ROWS: {
6580
0
            if (src0_needs_grads) {
6581
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_get_rows_back(ctx, grad, src1, src0));
6582
0
            }
6583
0
            if (src1_needs_grads) {
6584
                // noop
6585
0
            }
6586
0
        } break;
6587
0
        case GGML_OP_DIAG_MASK_INF: {
6588
0
            if (src0_needs_grads) {
6589
                /* ggml_diag_mask_inf_impl() shouldn't be here */
6590
                /* ref:  https://github.com/ggml-org/llama.cpp/pull/4203#discussion_r1412377992 */
6591
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6592
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6593
0
            }
6594
0
        } break;
6595
0
        case GGML_OP_DIAG_MASK_ZERO: {
6596
0
            if (src0_needs_grads) {
6597
0
                const int n_past = ((const int32_t *) tensor->op_params)[0];
6598
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_diag_mask_zero_impl(ctx, grad, n_past, false));
6599
0
            }
6600
0
        } break;
6601
0
        case GGML_OP_SOFT_MAX: {
6602
0
            if (src0_needs_grads) {
6603
0
                float scale    = 1.0f;
6604
0
                float max_bias = 0.0f;
6605
6606
0
                memcpy(&scale,    (const float *) tensor->op_params + 0, sizeof(float));
6607
0
                memcpy(&max_bias, (const float *) tensor->op_params + 1, sizeof(float));
6608
6609
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_soft_max_ext_back(ctx, grad, tensor, scale, max_bias));
6610
0
            }
6611
0
            GGML_ASSERT((!src1 || !src1_needs_grads) && "backward pass for softmax mask not implemented");
6612
0
        } break;
6613
0
        case GGML_OP_ROPE: {
6614
0
            if (src0_needs_grads) {
6615
                //const int n_past = ((int32_t *) tensor->op_params)[0];
6616
0
                const int n_dims     = ((const int32_t *) tensor->op_params)[1];
6617
0
                const int mode       = ((const int32_t *) tensor->op_params)[2];
6618
                //const int n_ctx      = ((int32_t *) tensor->op_params)[3];
6619
0
                const int n_ctx_orig = ((const int32_t *) tensor->op_params)[4];
6620
0
                float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
6621
0
                int sections[4] = {0, 0, 0, 0};
6622
6623
0
                memcpy(&freq_base,   (const float *) tensor->op_params +  5, sizeof(float));
6624
0
                memcpy(&freq_scale,  (const float *) tensor->op_params +  6, sizeof(float));
6625
0
                memcpy(&ext_factor,  (const float *) tensor->op_params +  7, sizeof(float));
6626
0
                memcpy(&attn_factor, (const float *) tensor->op_params +  8, sizeof(float));
6627
0
                memcpy(&beta_fast,   (const float *) tensor->op_params +  9, sizeof(float));
6628
0
                memcpy(&beta_slow,   (const float *) tensor->op_params + 10, sizeof(float));
6629
0
                memcpy(&sections,                    tensor->op_params + 11, sizeof(sections));
6630
6631
0
                struct ggml_tensor * rope_back = grad->ne[2] == src1->ne[0] ?
6632
0
                    ggml_rope_ext_back(ctx, grad, src1, src2, n_dims,
6633
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow) :
6634
0
                    ggml_rope_multi_back(ctx, grad, src1, src2, n_dims, sections,
6635
0
                        mode, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow);
6636
0
                ggml_add_or_set(ctx, cgraph, isrc0, rope_back);
6637
0
            }
6638
0
            GGML_ASSERT((!src2 || !src2_needs_grads) && "gradients for freq factors not implemented");
6639
0
        } break;
6640
0
        case GGML_OP_IM2COL: {
6641
0
            if (src1_needs_grads) {
6642
0
                const int32_t s0    = ggml_get_op_params_i32(tensor, 0);
6643
0
                const int32_t s1    = ggml_get_op_params_i32(tensor, 1);
6644
0
                const int32_t p0    = ggml_get_op_params_i32(tensor, 2);
6645
0
                const int32_t p1    = ggml_get_op_params_i32(tensor, 3);
6646
0
                const int32_t d0    = ggml_get_op_params_i32(tensor, 4);
6647
0
                const int32_t d1    = ggml_get_op_params_i32(tensor, 5);
6648
0
                const bool    is_2D = ggml_get_op_params_i32(tensor, 6) == 1;
6649
6650
0
                ggml_add_or_set(ctx, cgraph, isrc1, ggml_im2col_back(ctx, grad, src0, src1->ne, s0, s1, p0, p1, d0, d1, is_2D));
6651
0
            }
6652
0
        } break;
6653
0
        case GGML_OP_POOL_2D: {
6654
0
            if (src0_needs_grads) {
6655
0
                const enum ggml_op_pool op = ggml_get_op_params_i32(tensor, 0);
6656
0
                const      int32_t      k0 = ggml_get_op_params_i32(tensor, 1);
6657
0
                const      int32_t      k1 = ggml_get_op_params_i32(tensor, 2);
6658
0
                const      int32_t      s0 = ggml_get_op_params_i32(tensor, 3);
6659
0
                const      int32_t      s1 = ggml_get_op_params_i32(tensor, 4);
6660
0
                const      int32_t      p0 = ggml_get_op_params_i32(tensor, 5);
6661
0
                const      int32_t      p1 = ggml_get_op_params_i32(tensor, 6);
6662
6663
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_pool_2d_back(ctx, grad, src0, op, k0, k1, s0, s1, p0, p1));
6664
0
            }
6665
0
        } break;
6666
0
        case GGML_OP_WIN_PART:
6667
0
        case GGML_OP_WIN_UNPART:
6668
0
        case GGML_OP_UNARY: {
6669
0
            switch (ggml_get_unary_op(tensor)) {
6670
0
                case GGML_UNARY_OP_ABS: {
6671
0
                    if (src0_needs_grads) {
6672
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_sgn(ctx, src0), grad));
6673
0
                    }
6674
0
                } break;
6675
0
                case GGML_UNARY_OP_SGN: {
6676
                    // noop
6677
0
                } break;
6678
0
                case GGML_UNARY_OP_NEG: {
6679
0
                    if (src0_needs_grads) {
6680
0
                        ggml_sub_or_set(ctx, cgraph, isrc0, grad);
6681
0
                    }
6682
0
                } break;
6683
0
                case GGML_UNARY_OP_STEP: {
6684
                    // noop
6685
0
                } break;
6686
0
                case GGML_UNARY_OP_RELU: {
6687
0
                    if (src0_needs_grads) {
6688
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, ggml_step(ctx, src0), grad));
6689
0
                    }
6690
0
                } break;
6691
0
                case GGML_UNARY_OP_SILU: {
6692
0
                    if (src0_needs_grads) {
6693
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, grad, src0));
6694
0
                    }
6695
0
                } break;
6696
0
                case GGML_UNARY_OP_EXP: {
6697
0
                    if (src0_needs_grads) {
6698
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, tensor, grad));
6699
0
                    }
6700
0
                } break;
6701
0
                case GGML_UNARY_OP_EXPM1: {
6702
0
                    if (src0_needs_grads) {
6703
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_exp(ctx, src0)));
6704
0
                    }
6705
0
                } break;
6706
0
                case GGML_UNARY_OP_SOFTPLUS: {
6707
0
                    if (src0_needs_grads) {
6708
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_mul(ctx, grad, ggml_sigmoid(ctx, src0)));
6709
0
                    }
6710
0
                } break;
6711
0
                default: {
6712
0
                    fprintf(stderr, "%s: unsupported unary op for backward pass: %s\n",
6713
0
                        __func__, ggml_unary_op_name(ggml_get_unary_op(tensor)));
6714
0
                    GGML_ABORT("fatal error");
6715
0
                } //break;
6716
0
            }
6717
0
        } break;
6718
0
        case GGML_OP_CROSS_ENTROPY_LOSS: {
6719
0
            if (src0_needs_grads) {
6720
0
                ggml_add_or_set(ctx, cgraph, isrc0, ggml_cross_entropy_loss_back(ctx, grad, src0, src1));
6721
0
            }
6722
0
            GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented");
6723
0
        } break;
6724
0
        case GGML_OP_GLU: {
6725
0
            switch (ggml_get_glu_op(tensor)) {
6726
0
                case GGML_GLU_OP_SWIGLU: {
6727
0
                    if (src0_needs_grads) {
6728
0
                        GGML_ASSERT(src1 && "backward pass only implemented for split swiglu");
6729
0
                        ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0));
6730
0
                    }
6731
0
                    if (src1_needs_grads) {
6732
0
                        ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad));
6733
0
                    }
6734
0
                } break;
6735
0
                default: {
6736
0
                    GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor)));
6737
0
                } //break;
6738
0
            }
6739
0
        } break;
6740
0
        case GGML_OP_NONE: {
6741
            // noop
6742
0
        } break;
6743
0
        case GGML_OP_COUNT:
6744
0
        default: {
6745
0
            GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op));
6746
0
        } //break;
6747
0
    }
6748
6749
0
    GGML_ASSERT(!src0_needs_grads || ggml_are_same_shape(src0, cgraph->grads[isrc0]));
6750
0
    GGML_ASSERT(!src1_needs_grads || ggml_are_same_shape(src1, cgraph->grads[isrc1]));
6751
0
    GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2]));
6752
0
}
6753
6754
0
static size_t ggml_visit_parents_graph(struct ggml_cgraph * cgraph, struct ggml_tensor * node, bool compute) {
6755
0
    if (node->op != GGML_OP_NONE && compute) {
6756
0
        node->flags |= GGML_TENSOR_FLAG_COMPUTE;
6757
0
    }
6758
6759
0
    const size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node);
6760
0
    GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL);
6761
6762
0
    if (ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) {
6763
        // already visited
6764
6765
0
        if (compute) {
6766
            // update the compute flag regardless
6767
0
            for (int i = 0; i < GGML_MAX_SRC; ++i) {
6768
0
                struct ggml_tensor * src = node->src[i];
6769
0
                if (src && ((src->flags & GGML_TENSOR_FLAG_COMPUTE) == 0)) {
6770
0
                    ggml_visit_parents_graph(cgraph, src, true);
6771
0
                }
6772
0
            }
6773
0
        }
6774
6775
0
        return node_hash_pos;
6776
0
    }
6777
6778
    // This is the first time we see this node in the current graph.
6779
0
    cgraph->visited_hash_set.keys[node_hash_pos] = node;
6780
0
    ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos);
6781
0
    cgraph->use_counts[node_hash_pos] = 0;
6782
6783
0
    for (int i = 0; i < GGML_MAX_SRC; ++i) {
6784
0
        const int k =
6785
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i :
6786
0
            (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) :
6787
0
            /* unknown order, just fall back to using i */ i;
6788
6789
0
        struct ggml_tensor * src = node->src[k];
6790
0
        if (src) {
6791
0
            const size_t src_hash_pos = ggml_visit_parents_graph(cgraph, src, compute);
6792
6793
            // Update the use count for this operand.
6794
0
            cgraph->use_counts[src_hash_pos]++;
6795
0
        }
6796
0
    }
6797
6798
0
    if (node->op == GGML_OP_NONE && !(node->flags & GGML_TENSOR_FLAG_PARAM)) {
6799
        // reached a leaf node, not part of the gradient graph (e.g. a constant)
6800
0
        GGML_ASSERT(cgraph->n_leafs < cgraph->size);
6801
6802
0
        if (strlen(node->name) == 0) {
6803
0
            ggml_format_name(node, "leaf_%d", cgraph->n_leafs);
6804
0
        }
6805
6806
0
        cgraph->leafs[cgraph->n_leafs] = node;
6807
0
        cgraph->n_leafs++;
6808
0
    } else {
6809
0
        GGML_ASSERT(cgraph->n_nodes < cgraph->size);
6810
6811
0
        if (strlen(node->name) == 0) {
6812
0
            ggml_format_name(node, "node_%d", cgraph->n_nodes);
6813
0
        }
6814
6815
0
        cgraph->nodes[cgraph->n_nodes] = node;
6816
0
        cgraph->n_nodes++;
6817
0
    }
6818
6819
0
    return node_hash_pos;
6820
0
}
6821
6822
0
static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand, bool compute) {
6823
0
    if (!expand) {
6824
        // TODO: this branch isn't accessible anymore, maybe move this to ggml_build_forward_expand
6825
0
        ggml_graph_clear(cgraph);
6826
0
    }
6827
6828
0
    const int n_old = cgraph->n_nodes;
6829
6830
0
    ggml_visit_parents_graph(cgraph, tensor, compute);
6831
6832
0
    const int n_new = cgraph->n_nodes - n_old;
6833
0
    GGML_PRINT_DEBUG("%s: visited %d new nodes\n", __func__, n_new);
6834
6835
0
    if (n_new > 0) {
6836
        // the last added node should always be starting point
6837
0
        GGML_ASSERT(cgraph->nodes[cgraph->n_nodes - 1] == tensor);
6838
0
    }
6839
0
}
6840
6841
struct ggml_tensor * ggml_build_forward_select(
6842
        struct ggml_cgraph  * cgraph,
6843
        struct ggml_tensor ** tensors,
6844
        int                   n_tensors,
6845
0
        int                   idx) {
6846
0
    GGML_ASSERT(idx >= 0 && idx < n_tensors);
6847
6848
0
    for (int i = 0; i < n_tensors; i++) {
6849
0
        ggml_build_forward_impl(cgraph, tensors[i], true, i == idx ? true : false);
6850
0
    }
6851
6852
0
    return tensors[idx];
6853
0
}
6854
6855
0
void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
6856
0
    ggml_build_forward_impl(cgraph, tensor, true, true);
6857
0
}
6858
6859
void ggml_build_backward_expand(
6860
        struct ggml_context *  ctx,
6861
        struct ggml_cgraph  *  cgraph,
6862
0
        struct ggml_tensor  ** grad_accs) {
6863
0
    GGML_ASSERT(cgraph->n_nodes > 0);
6864
0
    GGML_ASSERT(cgraph->grads);
6865
0
    GGML_ASSERT(cgraph->grad_accs);
6866
6867
0
    const int n_nodes_f = cgraph->n_nodes;
6868
6869
0
    memset(cgraph->grads,     0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6870
0
    memset(cgraph->grad_accs, 0, cgraph->visited_hash_set.size*sizeof(struct ggml_tensor *));
6871
0
    bool * grads_needed = calloc(cgraph->visited_hash_set.size, sizeof(bool));
6872
6873
0
    {
6874
0
        bool any_params = false;
6875
0
        bool any_loss   = false;
6876
0
        for (int i = 0; i < n_nodes_f; ++i) {
6877
0
            struct ggml_tensor * node = cgraph->nodes[i];
6878
0
            any_params = any_params || (node->flags & GGML_TENSOR_FLAG_PARAM);
6879
0
            any_loss   = any_loss   || (node->flags & GGML_TENSOR_FLAG_LOSS);
6880
0
        }
6881
0
        GGML_ASSERT(any_params && "no trainable parameters found, did you forget to call ggml_set_param?");
6882
0
        GGML_ASSERT(any_loss && "no training loss found, did you forget to call ggml_set_loss?");
6883
0
    }
6884
6885
0
    for (int i = 0; i < n_nodes_f; ++i) {
6886
0
        struct ggml_tensor * node = cgraph->nodes[i];
6887
6888
0
        if (node->type == GGML_TYPE_I32) {
6889
0
            continue;
6890
0
        }
6891
6892
0
        bool node_needs_grad = (node->flags & GGML_TENSOR_FLAG_PARAM) || (node->flags & GGML_TENSOR_FLAG_LOSS);
6893
0
        bool ignore_src[GGML_MAX_SRC] = {false};
6894
0
        switch (node->op) {
6895
            // gradients in node->src[0] for one reason or another have no effect on output gradients
6896
0
            case GGML_OP_IM2COL:      // only used for its shape
6897
0
            case GGML_OP_IM2COL_BACK: // same as IM2COL
6898
0
                ignore_src[0] = true;
6899
0
                break;
6900
0
            case GGML_OP_UNARY: {
6901
0
                const enum ggml_unary_op uop = ggml_get_unary_op(node);
6902
                // SGN and STEP unary ops are piecewise constant
6903
0
                if (uop == GGML_UNARY_OP_SGN || uop == GGML_UNARY_OP_STEP) {
6904
0
                    ignore_src[0] = true;
6905
0
                }
6906
0
            } break;
6907
6908
            // gradients in node->src[1] for one reason or another have no effect on output gradients
6909
0
            case GGML_OP_CPY:           // gradients in CPY target are irrelevant
6910
0
            case GGML_OP_GET_ROWS:      // row indices not differentiable
6911
0
            case GGML_OP_GET_ROWS_BACK: // same as for GET_ROWS
6912
0
            case GGML_OP_ROPE:          // positions not differentiable
6913
0
                ignore_src[1] = true;
6914
0
                break;
6915
6916
0
            default:
6917
0
                break;
6918
0
        }
6919
0
        for (int j = 0; j < GGML_MAX_SRC; ++j) {
6920
0
            if (!node->src[j] || ignore_src[j] || !grads_needed[ggml_hash_find(&cgraph->visited_hash_set, node->src[j])]) {
6921
0
                continue;
6922
0
            }
6923
0
            GGML_ASSERT(node->src[j]->type == GGML_TYPE_F32 || node->src[j]->type == GGML_TYPE_F16);
6924
0
            node_needs_grad = true;
6925
0
            break;
6926
0
        }
6927
0
        if (!node_needs_grad) {
6928
0
            continue;
6929
0
        }
6930
6931
        // inplace operations are currently not supported
6932
0
        GGML_ASSERT(!node->view_src || node->op == GGML_OP_CPY || node->op == GGML_OP_VIEW ||
6933
0
            node->op == GGML_OP_RESHAPE || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_TRANSPOSE);
6934
6935
0
        const size_t ihash = ggml_hash_find(&cgraph->visited_hash_set, node);
6936
0
        GGML_ASSERT(ihash != GGML_HASHSET_FULL);
6937
0
        GGML_ASSERT(ggml_bitset_get(cgraph->visited_hash_set.used, ihash));
6938
0
        if (grad_accs && grad_accs[i]) {
6939
0
            cgraph->grad_accs[ihash] = grad_accs[i];
6940
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6941
0
        } else if (node->flags & GGML_TENSOR_FLAG_LOSS) {
6942
            // loss tensors always need a gradient accumulator
6943
0
            cgraph->grad_accs[ihash] = ggml_new_tensor(ctx, GGML_TYPE_F32, GGML_MAX_DIMS, node->ne);
6944
0
            cgraph->grads[ihash]     = cgraph->grad_accs[ihash];
6945
0
        }
6946
0
        grads_needed[ihash] = true;
6947
0
    }
6948
6949
0
    for (int i = n_nodes_f - 1; i >= 0; --i) {
6950
        // inplace operations to add gradients are not created by ggml_compute_backward except for gradient accumulation
6951
        // use allocator to automatically make inplace operations
6952
0
        ggml_compute_backward(ctx, cgraph, i, grads_needed);
6953
0
    }
6954
6955
0
    free(grads_needed);
6956
0
}
6957
6958
0
static void * incr_ptr_aligned(void ** p, size_t size, size_t align) {
6959
0
    void * ptr = *p;
6960
0
    ptr = (void *) GGML_PAD((uintptr_t) ptr, align);
6961
0
    *p = (void *) ((char *) ptr + size);
6962
0
    return ptr;
6963
0
}
6964
6965
0
static size_t ggml_graph_nbytes(size_t size, bool grads) {
6966
0
    size_t hash_size = ggml_hash_size(size * 2);
6967
0
    void * p = 0;
6968
0
    incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1);
6969
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes
6970
0
    incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs
6971
0
    incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts
6972
0
    incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys
6973
0
    if (grads) {
6974
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads
6975
0
        incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grad_accs
6976
0
    }
6977
0
    incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
6978
6979
0
    size_t nbytes = (size_t) p;
6980
0
    return nbytes;
6981
0
}
6982
6983
0
size_t ggml_graph_overhead_custom(size_t size, bool grads) {
6984
0
    return GGML_OBJECT_SIZE + GGML_PAD(ggml_graph_nbytes(size, grads), GGML_MEM_ALIGN);
6985
0
}
6986
6987
0
size_t ggml_graph_overhead(void) {
6988
0
    return ggml_graph_overhead_custom(GGML_DEFAULT_GRAPH_SIZE, false);
6989
0
}
6990
6991
0
struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t size, bool grads) {
6992
0
    const size_t obj_size = ggml_graph_nbytes(size, grads);
6993
0
    struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_GRAPH, obj_size);
6994
0
    struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
6995
6996
    // the size of the hash table is doubled since it needs to hold both nodes and leafs
6997
0
    size_t hash_size = ggml_hash_size(size * 2);
6998
6999
0
    void * p = cgraph + 1;
7000
7001
0
    struct ggml_tensor ** nodes_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7002
0
    struct ggml_tensor ** leafs_ptr      =         incr_ptr_aligned(&p, size      * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7003
0
    int32_t             * use_counts_ptr =         incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t));
7004
0
    struct ggml_tensor ** hash_keys_ptr  =         incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *));
7005
0
    struct ggml_tensor ** grads_ptr      = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7006
0
    struct ggml_tensor ** grad_accs_ptr  = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL;
7007
7008
0
    ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t));
7009
7010
    // check that we allocated the correct amount of memory
7011
0
    assert(obj_size == (size_t)((char *)p - (char *)cgraph));
7012
7013
0
    *cgraph = (struct ggml_cgraph) {
7014
0
        /*.size         =*/ size,
7015
0
        /*.n_nodes      =*/ 0,
7016
0
        /*.n_leafs      =*/ 0,
7017
0
        /*.nodes        =*/ nodes_ptr,
7018
0
        /*.grads        =*/ grads_ptr,
7019
0
        /*.grad_accs    =*/ grad_accs_ptr,
7020
0
        /*.leafs        =*/ leafs_ptr,
7021
0
        /*.use_counts   =*/ use_counts_ptr,
7022
0
        /*.hash_table   =*/ { hash_size, hash_used, hash_keys_ptr },
7023
0
        /*.order        =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
7024
0
    };
7025
7026
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7027
0
    if (grads) {
7028
0
        memset(cgraph->grads,     0, hash_size*sizeof(struct ggml_tensor *));
7029
0
        memset(cgraph->grad_accs, 0, hash_size*sizeof(struct ggml_tensor *));
7030
0
    }
7031
7032
0
    return cgraph;
7033
0
}
7034
7035
0
struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
7036
0
    return ggml_new_graph_custom(ctx, GGML_DEFAULT_GRAPH_SIZE, false);
7037
0
}
7038
7039
0
struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) {
7040
0
    struct ggml_cgraph cgraph = {
7041
0
        /*.size             =*/ 0,
7042
0
        /*.n_nodes          =*/ i1 - i0,
7043
0
        /*.n_leafs          =*/ 0,
7044
0
        /*.nodes            =*/ cgraph0->nodes + i0,
7045
0
        /*.grads            =*/ NULL, // gradients would need visited_hash_set
7046
0
        /*.grad_accs        =*/ NULL,
7047
0
        /*.leafs            =*/ NULL,
7048
0
        /*.use_counts       =*/ cgraph0->use_counts,
7049
0
        /*.visited_hash_set =*/ cgraph0->visited_hash_set,
7050
0
        /*.order            =*/ cgraph0->order,
7051
0
    };
7052
7053
0
    return cgraph;
7054
0
}
7055
7056
0
void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) {
7057
0
    GGML_ASSERT(dst->size >= src->n_leafs);
7058
0
    GGML_ASSERT(dst->size >= src->n_nodes);
7059
0
    GGML_ASSERT(dst->visited_hash_set.size >= src->visited_hash_set.size);
7060
7061
0
    dst->n_leafs = src->n_leafs;
7062
0
    dst->n_nodes = src->n_nodes;
7063
0
    dst->order   = src->order;
7064
7065
0
    for (int i = 0; i < src->n_leafs; ++i) {
7066
0
        dst->leafs[i] = src->leafs[i];
7067
0
    }
7068
7069
0
    for (int i = 0; i < src->n_nodes; ++i) {
7070
0
        dst->nodes[i] = src->nodes[i];
7071
0
    }
7072
7073
0
    for (size_t i = 0; i < src->visited_hash_set.size; ++i) {
7074
        // copy all hashset keys (tensors) that are in use
7075
0
        if (ggml_bitset_get(src->visited_hash_set.used, i)) {
7076
0
            size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]);
7077
0
            dst->use_counts[new_hash_pos] = src->use_counts[i];
7078
0
        }
7079
0
    }
7080
7081
0
    if (dst->grads) {
7082
0
        memset(dst->grads,     0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7083
0
        memset(dst->grad_accs, 0, dst->visited_hash_set.size*sizeof(struct ggml_tensor *));
7084
0
    }
7085
0
    if (src->grads) {
7086
0
        GGML_ASSERT(dst->grads     != NULL);
7087
0
        GGML_ASSERT(dst->grad_accs != NULL);
7088
0
        for (int i = 0; i < src->n_nodes; ++i) {
7089
0
            const size_t igrad_src = ggml_hash_find(&src->visited_hash_set, src->nodes[i]);
7090
0
            const size_t igrad_dst = ggml_hash_find(&dst->visited_hash_set, dst->nodes[i]);
7091
7092
0
            GGML_ASSERT(igrad_src != GGML_HASHSET_FULL);
7093
0
            GGML_ASSERT(ggml_bitset_get(src->visited_hash_set.used, igrad_src));
7094
0
            GGML_ASSERT(igrad_dst != GGML_HASHSET_FULL);
7095
0
            GGML_ASSERT(ggml_bitset_get(dst->visited_hash_set.used, igrad_dst));
7096
7097
0
            dst->grads[igrad_dst]     = src->grads[igrad_src];
7098
0
            dst->grad_accs[igrad_dst] = src->grad_accs[igrad_src];
7099
0
        }
7100
0
    }
7101
0
}
7102
7103
0
struct ggml_cgraph * ggml_graph_dup(struct ggml_context * ctx, struct ggml_cgraph * cgraph, bool force_grads) {
7104
0
    struct ggml_cgraph * result = ggml_new_graph_custom(ctx, cgraph->size, cgraph->grads || force_grads);
7105
0
    ggml_graph_cpy(cgraph, result);
7106
0
    return result;
7107
0
}
7108
7109
0
struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
7110
0
    if (ggml_is_empty(tensor)) {
7111
0
        return tensor;
7112
0
    }
7113
0
    if (tensor->buffer) {
7114
0
        ggml_backend_tensor_memset(tensor, 0, 0, ggml_nbytes(tensor));
7115
0
    } else {
7116
0
        GGML_ASSERT(tensor->data);
7117
0
        memset(tensor->data, 0, ggml_nbytes(tensor));
7118
0
    }
7119
0
    return tensor;
7120
0
}
7121
7122
0
void ggml_graph_reset(struct ggml_cgraph * cgraph) {
7123
0
    if (!cgraph) {
7124
0
        return;
7125
0
    }
7126
0
    GGML_ASSERT(cgraph->grads != NULL);
7127
7128
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7129
0
        struct ggml_tensor * node     = cgraph->nodes[i];
7130
0
        struct ggml_tensor * grad_acc = ggml_graph_get_grad_acc(cgraph, node);
7131
7132
0
        if (node->op == GGML_OP_OPT_STEP_ADAMW) {
7133
            // clear momenta
7134
0
            ggml_set_zero(node->src[2]);
7135
0
            ggml_set_zero(node->src[3]);
7136
0
        }
7137
7138
        // initial gradients of loss should be 1, 0 otherwise
7139
0
        if (grad_acc) {
7140
0
            if (node->flags & GGML_TENSOR_FLAG_LOSS) {
7141
0
                GGML_ASSERT(grad_acc->type == GGML_TYPE_F32);
7142
0
                GGML_ASSERT(ggml_is_scalar(grad_acc));
7143
7144
0
                const float onef = 1.0f;
7145
0
                if (grad_acc->buffer) {
7146
0
                    ggml_backend_tensor_set(grad_acc, &onef, 0, sizeof(float));
7147
0
                } else {
7148
0
                    GGML_ASSERT(grad_acc->data);
7149
0
                    *((float *) grad_acc->data) = onef;
7150
0
                }
7151
0
            } else {
7152
0
                ggml_set_zero(grad_acc);
7153
0
            }
7154
0
        }
7155
0
    }
7156
0
}
7157
7158
0
void ggml_graph_clear(struct ggml_cgraph * cgraph) {
7159
0
    cgraph->n_leafs = 0;
7160
0
    cgraph->n_nodes = 0;
7161
0
    ggml_hash_set_reset(&cgraph->visited_hash_set);
7162
0
}
7163
7164
0
int ggml_graph_size(struct ggml_cgraph * cgraph) {
7165
0
    return cgraph->size;
7166
0
}
7167
7168
0
struct ggml_tensor * ggml_graph_node(struct ggml_cgraph * cgraph, int i) {
7169
0
    if (i < 0) {
7170
0
        GGML_ASSERT(cgraph->n_nodes + i >= 0);
7171
0
        return cgraph->nodes[cgraph->n_nodes + i];
7172
0
    }
7173
7174
0
    GGML_ASSERT(i < cgraph->n_nodes);
7175
0
    return cgraph->nodes[i];
7176
0
}
7177
7178
0
struct ggml_tensor ** ggml_graph_nodes(struct ggml_cgraph * cgraph) {
7179
0
    return cgraph->nodes;
7180
0
}
7181
7182
0
int ggml_graph_n_nodes(struct ggml_cgraph * cgraph) {
7183
0
    return cgraph->n_nodes;
7184
0
}
7185
7186
0
void ggml_graph_add_node(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor) {
7187
0
    GGML_ASSERT(cgraph->size > cgraph->n_nodes);
7188
0
    cgraph->nodes[cgraph->n_nodes] = tensor;
7189
0
    cgraph->n_nodes++;
7190
0
}
7191
7192
0
struct ggml_tensor * ggml_graph_get_tensor(const struct ggml_cgraph * cgraph, const char * name) {
7193
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7194
0
        struct ggml_tensor * leaf = cgraph->leafs[i];
7195
7196
0
        if (strcmp(leaf->name, name) == 0) {
7197
0
            return leaf;
7198
0
        }
7199
0
    }
7200
7201
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7202
0
        struct ggml_tensor * node = cgraph->nodes[i];
7203
7204
0
        if (strcmp(node->name, name) == 0) {
7205
0
            return node;
7206
0
        }
7207
0
    }
7208
7209
0
    return NULL;
7210
0
}
7211
7212
0
struct ggml_tensor * ggml_graph_get_grad(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7213
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7214
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grads ? cgraph->grads[igrad] : NULL;
7215
0
}
7216
7217
0
struct ggml_tensor * ggml_graph_get_grad_acc(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7218
0
    const size_t igrad = ggml_hash_find(&cgraph->visited_hash_set, node);
7219
0
    return igrad != GGML_HASHSET_FULL && ggml_bitset_get(cgraph->visited_hash_set.used, igrad) && cgraph->grad_accs ? cgraph->grad_accs[igrad] : NULL;
7220
0
}
7221
7222
0
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
7223
0
    GGML_LOG_INFO("=== GRAPH ===\n");
7224
7225
0
    GGML_LOG_INFO("n_nodes = %d\n", cgraph->n_nodes);
7226
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7227
0
        struct ggml_tensor * node = cgraph->nodes[i];
7228
7229
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s\n",
7230
0
                i,
7231
0
                node->ne[0], node->ne[1], node->ne[2],
7232
0
                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" :
7233
0
                      ggml_graph_get_grad(cgraph, node) ? "g" : " ");
7234
0
    }
7235
7236
0
    GGML_LOG_INFO("n_leafs = %d\n", cgraph->n_leafs);
7237
0
    for (int i = 0; i < cgraph->n_leafs; i++) {
7238
0
        struct ggml_tensor * node = cgraph->leafs[i];
7239
7240
0
        GGML_LOG_INFO(" - %3d: [ %5" PRId64 ", %5" PRId64 "] %8s %16s\n",
7241
0
                i,
7242
0
                node->ne[0], node->ne[1],
7243
0
                ggml_op_name(node->op),
7244
0
                ggml_get_name(node));
7245
0
    }
7246
7247
0
    GGML_LOG_INFO("========================================\n");
7248
0
}
7249
7250
static int ggml_node_list_find_tensor(const struct ggml_cgraph * cgraph,
7251
                                      const int *                idxs,
7252
                                      int                        count,
7253
0
                                      const struct ggml_tensor * tensor) {
7254
0
    GGML_ASSERT(cgraph && idxs);
7255
0
    for (int i = 0; i < count; ++i) {
7256
0
        const int node_idx = idxs[i];
7257
7258
0
        if (node_idx >= cgraph->n_nodes) {
7259
0
            return -1;
7260
0
        }
7261
0
        if (cgraph->nodes[node_idx] == tensor) {
7262
0
            return i;
7263
0
        }
7264
0
    }
7265
0
    return -1;
7266
0
}
7267
7268
bool ggml_can_fuse_subgraph_ext(const struct ggml_cgraph * cgraph,
7269
                                const int *                node_idxs,
7270
                                int                        count,
7271
                                const enum ggml_op *       ops,
7272
                                const int *                outputs,
7273
0
                                int                        num_outputs) {
7274
0
    GGML_ASSERT(outputs && num_outputs > 0);
7275
7276
0
    for (int i = 0; i < count; ++i) {
7277
0
        if (node_idxs[i] >= cgraph->n_nodes) {
7278
0
            return false;
7279
0
        }
7280
7281
0
        const struct ggml_tensor * node = cgraph->nodes[node_idxs[i]];
7282
7283
0
        if (node->op != ops[i]) {
7284
0
            return false;
7285
0
        }
7286
7287
0
        if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) == 0) {
7288
0
            return false;
7289
0
        }
7290
7291
0
        if (ggml_node_list_find_tensor(cgraph, outputs, num_outputs, node) != -1) {
7292
0
            continue;
7293
0
        }
7294
7295
0
        if (node->flags & GGML_TENSOR_FLAG_OUTPUT) {
7296
0
            return false;
7297
0
        }
7298
7299
0
        int subgraph_uses = 0;
7300
0
        for (int j = i + 1; j < count; ++j) {
7301
0
            const struct ggml_tensor * other_node = cgraph->nodes[node_idxs[j]];
7302
0
            for (int src_idx = 0; src_idx < GGML_MAX_SRC; src_idx++) {
7303
0
                if (other_node->src[src_idx] == node) {
7304
0
                    subgraph_uses++;
7305
0
                }
7306
0
            }
7307
0
        }
7308
7309
0
        if (subgraph_uses != ggml_node_get_use_count(cgraph, node_idxs[i])) {
7310
0
            return false;
7311
0
        }
7312
7313
        // if node is a view, check if the view_src and all it's parent view_srcs are within the subgraph
7314
0
        struct ggml_tensor * view_src = node->view_src;
7315
0
        while (view_src) {
7316
0
            if (ggml_node_list_find_tensor(cgraph, node_idxs, count, view_src) == -1) {
7317
0
                return false;
7318
0
            }
7319
0
            view_src = view_src->view_src;
7320
0
        }
7321
0
    }
7322
7323
0
    return true;
7324
0
}
7325
7326
// check if node is part of the graph
7327
0
static bool ggml_graph_find(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7328
0
    if (cgraph == NULL) {
7329
0
        return true;
7330
0
    }
7331
7332
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7333
0
        if (cgraph->nodes[i] == node) {
7334
0
            return true;
7335
0
        }
7336
0
    }
7337
7338
0
    return false;
7339
0
}
7340
7341
0
static struct ggml_tensor * ggml_graph_get_parent(const struct ggml_cgraph * cgraph, const struct ggml_tensor * node) {
7342
0
    for (int i = 0; i < cgraph->n_nodes; i++) {
7343
0
        struct ggml_tensor * parent = cgraph->nodes[i];
7344
0
        struct ggml_tensor * grad = ggml_graph_get_grad(cgraph, parent);
7345
7346
0
        if (grad == node) {
7347
0
            return parent;
7348
0
        }
7349
0
    }
7350
7351
0
    return NULL;
7352
0
}
7353
7354
0
static void ggml_graph_dump_dot_node_edge(FILE * fp, const struct ggml_cgraph * gb, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7355
0
    struct ggml_tensor * gparent = ggml_graph_get_parent(gb, node);
7356
0
    struct ggml_tensor * gparent0 = ggml_graph_get_parent(gb, parent);
7357
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ arrowhead = %s; style = %s; label = \"%s\"; ]\n",
7358
0
            gparent0 ? (void *) gparent0 : (void *) parent,
7359
0
            gparent ? (void *) gparent : (void *) node,
7360
0
            gparent ? "empty" : "vee",
7361
0
            gparent ? "dashed" : "solid",
7362
0
            label);
7363
0
}
7364
7365
0
static void ggml_graph_dump_dot_leaf_edge(FILE * fp, struct ggml_tensor * node, struct ggml_tensor * parent, const char * label)  {
7366
0
    fprintf(fp, "  \"%p\" -> \"%p\" [ label = \"%s\"; ]\n",
7367
0
            (void *) parent,
7368
0
            (void *) node,
7369
0
            label);
7370
0
}
7371
7372
0
void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph * cgraph, const char * filename) {
7373
0
    char color[16];
7374
7375
0
    FILE * fp = ggml_fopen(filename, "w");
7376
0
    GGML_ASSERT(fp);
7377
7378
0
    fprintf(fp, "digraph G {\n");
7379
0
    fprintf(fp, "  newrank = true;\n");
7380
0
    fprintf(fp, "  rankdir = TB;\n");
7381
7382
0
    for (int i = 0; i < gb->n_nodes; i++) {
7383
0
        struct ggml_tensor * node = gb->nodes[i];
7384
0
        struct ggml_tensor * grad = ggml_graph_get_grad(gb, node);
7385
7386
0
        if (ggml_graph_get_parent(gb, node) != NULL) {
7387
0
            continue;
7388
0
        }
7389
7390
0
        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
7391
0
            snprintf(color, sizeof(color), "yellow");
7392
0
        } else if (grad) {
7393
0
            if (ggml_graph_find(cgraph, node)) {
7394
0
                snprintf(color, sizeof(color), "green");
7395
0
            } else {
7396
0
                snprintf(color, sizeof(color), "lightblue");
7397
0
            }
7398
0
        } else {
7399
0
            snprintf(color, sizeof(color), "white");
7400
0
        }
7401
7402
0
        fprintf(fp, "  \"%p\" [ "
7403
0
                    "style = filled; fillcolor = %s; shape = record; "
7404
0
                    "label=\"",
7405
0
                (void *) node, color);
7406
7407
0
        if (strlen(node->name) > 0) {
7408
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7409
0
        } else {
7410
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7411
0
        }
7412
7413
0
        if (ggml_is_matrix(node)) {
7414
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], ggml_op_symbol(node->op));
7415
0
        } else {
7416
0
            fprintf(fp, "%d [%" PRId64 ", %" PRId64 ", %" PRId64 "] | <x>%s", i, node->ne[0], node->ne[1], node->ne[2], ggml_op_symbol(node->op));
7417
0
        }
7418
7419
0
        if (grad) {
7420
0
            fprintf(fp, " | <g>%s\"; ]\n", ggml_op_symbol(grad->op));
7421
0
        } else {
7422
0
            fprintf(fp, "\"; ]\n");
7423
0
        }
7424
0
    }
7425
7426
0
    for (int i = 0; i < gb->n_leafs; i++) {
7427
0
        struct ggml_tensor * node = gb->leafs[i];
7428
7429
0
        snprintf(color, sizeof(color), "pink");
7430
7431
0
        fprintf(fp, "  \"%p\" [ "
7432
0
                    "style = filled; fillcolor = %s; shape = record; "
7433
0
                    "label=\"<x>",
7434
0
                (void *) node, color);
7435
7436
0
        if (strlen(node->name) > 0) {
7437
0
            fprintf(fp, "%s (%s)|", node->name, ggml_type_name(node->type));
7438
0
        } else {
7439
0
            fprintf(fp, "(%s)|", ggml_type_name(node->type));
7440
0
        }
7441
7442
0
        fprintf(fp, "CONST %d [%" PRId64 ", %" PRId64 "]", i, node->ne[0], node->ne[1]);
7443
0
        if (ggml_nelements(node) < 5 && node->data != NULL) {
7444
0
            fprintf(fp, " | (");
7445
0
            for (int j = 0; j < ggml_nelements(node); j++) {
7446
                // FIXME: use ggml-backend to obtain the tensor data
7447
                //if (node->type == GGML_TYPE_I8 || node->type == GGML_TYPE_I16 || node->type == GGML_TYPE_I32) {
7448
                //    fprintf(fp, "%d", ggml_get_i32_1d(node, j));
7449
                //}
7450
                //else if (node->type == GGML_TYPE_F32 ||
7451
                //         node->type == GGML_TYPE_F16 ||
7452
                //         node->type == GGML_TYPE_BF16) {
7453
                //    fprintf(fp, "%.1e", (double)ggml_get_f32_1d(node, j));
7454
                //}
7455
                //else
7456
0
                {
7457
0
                    fprintf(fp, "#");
7458
0
                }
7459
0
                if (j < ggml_nelements(node) - 1) {
7460
0
                    fprintf(fp, ", ");
7461
0
                }
7462
0
            }
7463
0
            fprintf(fp, ")");
7464
0
        }
7465
0
        fprintf(fp, "\"; ]\n");
7466
0
    }
7467
7468
0
    for (int i = 0; i < gb->n_nodes; i++) {
7469
0
        struct ggml_tensor * node = gb->nodes[i];
7470
7471
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7472
0
            if (node->src[j]) {
7473
0
                char label[16];
7474
0
                snprintf(label, sizeof(label), "src %d", j);
7475
0
                ggml_graph_dump_dot_node_edge(fp, gb, node, node->src[j], label);
7476
0
            }
7477
0
        }
7478
0
    }
7479
7480
0
    for (int i = 0; i < gb->n_leafs; i++) {
7481
0
        struct ggml_tensor * node = gb->leafs[i];
7482
7483
0
        for (int j = 0; j < GGML_MAX_SRC; j++) {
7484
0
            if (node->src[j]) {
7485
0
                char label[16];
7486
0
                snprintf(label, sizeof(label), "src %d", j);
7487
0
                ggml_graph_dump_dot_leaf_edge(fp, node, node->src[j], label);
7488
0
            }
7489
0
        }
7490
0
    }
7491
7492
0
    fprintf(fp, "}\n");
7493
7494
0
    fclose(fp);
7495
7496
0
    GGML_LOG_INFO("%s: dot -Tpng %s -o %s.png && open %s.png\n", __func__, filename, filename, filename);
7497
0
}
7498
7499
////////////////////////////////////////////////////////////////////////////////
7500
7501
0
void ggml_set_input(struct ggml_tensor * tensor) {
7502
0
    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
7503
0
}
7504
7505
0
void ggml_set_output(struct ggml_tensor * tensor) {
7506
0
    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
7507
0
}
7508
7509
0
void ggml_set_param(struct ggml_tensor * tensor) {
7510
0
    GGML_ASSERT(tensor->op == GGML_OP_NONE);
7511
0
    tensor->flags |= GGML_TENSOR_FLAG_PARAM;
7512
0
}
7513
7514
0
void ggml_set_loss(struct ggml_tensor * tensor) {
7515
0
    GGML_ASSERT(ggml_is_scalar(tensor));
7516
0
    GGML_ASSERT(tensor->type == GGML_TYPE_F32);
7517
0
    tensor->flags |= GGML_TENSOR_FLAG_LOSS;
7518
0
}
7519
7520
////////////////////////////////////////////////////////////////////////////////
7521
7522
0
void ggml_quantize_init(enum ggml_type type) {
7523
0
    ggml_critical_section_start();
7524
7525
0
    switch (type) {
7526
0
        case GGML_TYPE_IQ2_XXS:
7527
0
        case GGML_TYPE_IQ2_XS:
7528
0
        case GGML_TYPE_IQ2_S:
7529
0
        case GGML_TYPE_IQ1_S:
7530
0
        case GGML_TYPE_IQ1_M:   iq2xs_init_impl(type); break;
7531
0
        case GGML_TYPE_IQ3_XXS: iq3xs_init_impl(256); break;
7532
0
        case GGML_TYPE_IQ3_S:   iq3xs_init_impl(512); break;
7533
0
        default: // nothing
7534
0
            break;
7535
0
    }
7536
7537
0
    ggml_critical_section_end();
7538
0
}
7539
7540
4.31k
void ggml_quantize_free(void) {
7541
4.31k
    ggml_critical_section_start();
7542
7543
4.31k
    iq2xs_free_impl(GGML_TYPE_IQ2_XXS);
7544
4.31k
    iq2xs_free_impl(GGML_TYPE_IQ2_XS);
7545
4.31k
    iq2xs_free_impl(GGML_TYPE_IQ2_S);
7546
4.31k
    iq2xs_free_impl(GGML_TYPE_IQ1_S);
7547
4.31k
    iq2xs_free_impl(GGML_TYPE_IQ1_M);
7548
4.31k
    iq3xs_free_impl(256);
7549
4.31k
    iq3xs_free_impl(512);
7550
7551
4.31k
    ggml_critical_section_end();
7552
4.31k
}
7553
7554
0
bool ggml_quantize_requires_imatrix(enum ggml_type type) {
7555
0
    return
7556
0
        type == GGML_TYPE_IQ2_XXS ||
7557
0
        type == GGML_TYPE_IQ2_XS  ||
7558
0
        type == GGML_TYPE_IQ1_S;//   ||
7559
        //type == GGML_TYPE_IQ1_M;
7560
0
}
7561
7562
size_t ggml_quantize_chunk(
7563
        enum ggml_type   type,
7564
           const float * src,
7565
                  void * dst,
7566
               int64_t   start,
7567
               int64_t   nrows,
7568
               int64_t   n_per_row,
7569
0
           const float * imatrix) {
7570
0
    const int64_t n = (int64_t) nrows * n_per_row;
7571
7572
0
    if (ggml_quantize_requires_imatrix(type)) {
7573
0
        GGML_ASSERT(imatrix != NULL);
7574
0
    }
7575
7576
0
    GGML_ASSERT(start % type_traits[type].blck_size == 0);
7577
0
    GGML_ASSERT(start % n_per_row == 0);
7578
7579
0
    ggml_quantize_init(type); // this is noop if already initialized
7580
7581
0
    const size_t start_row = start / n_per_row;
7582
0
    const size_t row_size  = ggml_row_size(type, n_per_row);
7583
7584
0
    size_t result = 0;
7585
7586
0
    switch (type) {
7587
0
        case GGML_TYPE_Q4_0:    result = quantize_q4_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7588
0
        case GGML_TYPE_Q4_1:    result = quantize_q4_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7589
0
        case GGML_TYPE_Q5_0:    result = quantize_q5_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7590
0
        case GGML_TYPE_Q5_1:    result = quantize_q5_1(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7591
0
        case GGML_TYPE_Q8_0:    result = quantize_q8_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7592
0
        case GGML_TYPE_MXFP4:   result = quantize_mxfp4(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7593
0
        case GGML_TYPE_Q2_K:    result = quantize_q2_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7594
0
        case GGML_TYPE_Q3_K:    result = quantize_q3_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7595
0
        case GGML_TYPE_Q4_K:    result = quantize_q4_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7596
0
        case GGML_TYPE_Q5_K:    result = quantize_q5_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7597
0
        case GGML_TYPE_Q6_K:    result = quantize_q6_K(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7598
0
        case GGML_TYPE_TQ1_0:   result = quantize_tq1_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7599
0
        case GGML_TYPE_TQ2_0:   result = quantize_tq2_0(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7600
0
        case GGML_TYPE_IQ2_XXS: result = quantize_iq2_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7601
0
        case GGML_TYPE_IQ2_XS:  result = quantize_iq2_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7602
0
        case GGML_TYPE_IQ3_XXS: result = quantize_iq3_xxs(src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7603
0
        case GGML_TYPE_IQ3_S:   result = quantize_iq3_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7604
0
        case GGML_TYPE_IQ2_S:   result = quantize_iq2_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7605
0
        case GGML_TYPE_IQ1_S:   result = quantize_iq1_s  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7606
0
        case GGML_TYPE_IQ1_M:   result = quantize_iq1_m  (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7607
0
        case GGML_TYPE_IQ4_NL:  result = quantize_iq4_nl (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7608
0
        case GGML_TYPE_IQ4_XS:  result = quantize_iq4_xs (src + start, (char *) dst + start_row * row_size, nrows, n_per_row, imatrix); break;
7609
0
        case GGML_TYPE_F16:
7610
0
            {
7611
0
                size_t elemsize = sizeof(ggml_fp16_t);
7612
0
                ggml_fp32_to_fp16_row(src + start, (ggml_fp16_t *)dst + start, n);
7613
0
                result = n * elemsize;
7614
0
            } break;
7615
0
        case GGML_TYPE_BF16:
7616
0
            {
7617
0
                size_t elemsize = sizeof(ggml_bf16_t);
7618
0
                ggml_fp32_to_bf16_row_ref(src + start, (ggml_bf16_t *)dst + start, n);
7619
0
                result = n * elemsize;
7620
0
            } break;
7621
0
        case GGML_TYPE_F32:
7622
0
            {
7623
0
                size_t elemsize = sizeof(float);
7624
0
                result = n * elemsize;
7625
0
                memcpy((uint8_t *)dst + start * elemsize, src + start, result);
7626
0
            } break;
7627
0
        default:
7628
0
            assert(false);
7629
0
    }
7630
7631
0
    GGML_ASSERT(result == nrows * row_size);
7632
7633
0
    return result;
7634
0
}
7635
7636
////////////////////////////////////////////////////////////////////////////////
7637
7638
0
void ggml_log_get(ggml_log_callback * log_callback, void ** user_data) {
7639
0
    *log_callback = g_logger_state.log_callback;
7640
0
    *user_data    = g_logger_state.log_callback_user_data;
7641
0
}
7642
7643
0
void ggml_log_set(ggml_log_callback log_callback, void * user_data) {
7644
0
    g_logger_state.log_callback = log_callback ? log_callback : ggml_log_callback_default;
7645
0
    g_logger_state.log_callback_user_data = user_data;
7646
0
}
7647
7648
0
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
7649
0
    p->n_threads  = n_threads;
7650
0
    p->prio       = 0;     // default priority (usually means normal or inherited)
7651
0
    p->poll       = 50;    // hybrid-polling enabled
7652
0
    p->strict_cpu = false; // no strict placement (all threads share same cpumask)
7653
0
    p->paused     = false; // threads are ready to go
7654
0
    memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
7655
0
}
7656
7657
0
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
7658
0
    struct ggml_threadpool_params p;
7659
0
    ggml_threadpool_params_init(&p, n_threads);
7660
0
    return p;
7661
0
}
7662
7663
0
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
7664
0
    if (p0->n_threads      != p1->n_threads  )    return false;
7665
0
    if (p0->prio           != p1->prio       )    return false;
7666
0
    if (p0->poll           != p1->poll       )    return false;
7667
0
    if (p0->strict_cpu     != p1->strict_cpu )    return false;
7668
0
    return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
7669
0
}