Coverage Report

Created: 2025-11-24 06:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/ggml/src/ggml-quants.c
Line
Count
Source
1
#define GGML_COMMON_IMPL_C
2
#include "ggml-common.h"
3
4
#include "ggml-quants.h"
5
#include "ggml-impl.h"
6
#include "ggml-cpu/ggml-cpu-impl.h"
7
#include "ggml-cpu.h"
8
9
#include <math.h>
10
#include <string.h>
11
#include <assert.h>
12
#include <float.h>
13
#include <stdlib.h> // for qsort
14
#include <stdio.h>  // for GGML_ASSERT
15
16
0
#define GROUP_MAX_EPS 1e-15f
17
0
#define GROUP_MAX_EPS_IQ3_XXS 1e-8f
18
0
#define GROUP_MAX_EPS_IQ2_S 1e-8f
19
0
#define GROUP_MAX_EPS_IQ1_M 1e-7f
20
0
#define GROUP_MAX_EPS_IQ1_S 1e-12f
21
22
#define UNUSED GGML_UNUSED
23
24
0
static inline int best_index_int8(int n, const int8_t * val, float x) {
25
0
    if (x <= val[0]) return 0;
26
0
    if (x >= val[n-1]) return n-1;
27
0
    int ml = 0, mu = n-1;
28
0
    while (mu-ml > 1) {
29
0
        int mav = (ml+mu)/2;
30
0
        if (x < val[mav]) mu = mav; else ml = mav;
31
0
    }
32
0
    return x - val[mu-1] < val[mu] - x ? mu-1 : mu;
33
0
}
34
35
// reference implementation for deterministic creation of model files
36
0
void quantize_row_q4_0_ref(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t k) {
37
0
    static const int qk = QK4_0;
38
39
0
    assert(k % qk == 0);
40
41
0
    const int nb = k / qk;
42
43
0
    for (int i = 0; i < nb; i++) {
44
0
        float amax = 0.0f; // absolute max
45
0
        float max  = 0.0f;
46
47
0
        for (int j = 0; j < qk; j++) {
48
0
            const float v = x[i*qk + j];
49
0
            if (amax < fabsf(v)) {
50
0
                amax = fabsf(v);
51
0
                max  = v;
52
0
            }
53
0
        }
54
55
0
        const float d  = max / -8;
56
0
        const float id = d ? 1.0f/d : 0.0f;
57
58
0
        y[i].d = GGML_FP32_TO_FP16(d);
59
60
0
        for (int j = 0; j < qk/2; ++j) {
61
0
            const float x0 = x[i*qk + 0    + j]*id;
62
0
            const float x1 = x[i*qk + qk/2 + j]*id;
63
64
0
            const uint8_t xi0 = MIN(15, (int8_t)(x0 + 8.5f));
65
0
            const uint8_t xi1 = MIN(15, (int8_t)(x1 + 8.5f));
66
67
0
            y[i].qs[j]  = xi0;
68
0
            y[i].qs[j] |= xi1 << 4;
69
0
        }
70
0
    }
71
0
}
72
73
0
void quantize_row_q4_1_ref(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t k) {
74
0
    const int qk = QK4_1;
75
76
0
    assert(k % qk == 0);
77
78
0
    const int nb = k / qk;
79
80
0
    for (int i = 0; i < nb; i++) {
81
0
        float min = FLT_MAX;
82
0
        float max = -FLT_MAX;
83
84
0
        for (int j = 0; j < qk; j++) {
85
0
            const float v = x[i*qk + j];
86
87
0
            if (v < min) min = v;
88
0
            if (v > max) max = v;
89
0
        }
90
91
0
        const float d  = (max - min) / ((1 << 4) - 1);
92
0
        const float id = d ? 1.0f/d : 0.0f;
93
94
0
        y[i].d = GGML_FP32_TO_FP16(d);
95
0
        y[i].m = GGML_FP32_TO_FP16(min);
96
97
0
        for (int j = 0; j < qk/2; ++j) {
98
0
            const float x0 = (x[i*qk + 0    + j] - min)*id;
99
0
            const float x1 = (x[i*qk + qk/2 + j] - min)*id;
100
101
0
            const uint8_t xi0 = MIN(15, (int8_t)(x0 + 0.5f));
102
0
            const uint8_t xi1 = MIN(15, (int8_t)(x1 + 0.5f));
103
104
0
            y[i].qs[j]  = xi0;
105
0
            y[i].qs[j] |= xi1 << 4;
106
0
        }
107
0
    }
108
0
}
109
110
0
void quantize_row_q5_0_ref(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t k) {
111
0
    static const int qk = QK5_0;
112
113
0
    assert(k % qk == 0);
114
115
0
    const int nb = k / qk;
116
117
0
    for (int i = 0; i < nb; i++) {
118
0
        float amax = 0.0f; // absolute max
119
0
        float max  = 0.0f;
120
121
0
        for (int j = 0; j < qk; j++) {
122
0
            const float v = x[i*qk + j];
123
0
            if (amax < fabsf(v)) {
124
0
                amax = fabsf(v);
125
0
                max  = v;
126
0
            }
127
0
        }
128
129
0
        const float d  = max / -16;
130
0
        const float id = d ? 1.0f/d : 0.0f;
131
132
0
        y[i].d = GGML_FP32_TO_FP16(d);
133
134
0
        uint32_t qh = 0;
135
136
0
        for (int j = 0; j < qk/2; ++j) {
137
0
            const float x0 = x[i*qk + 0    + j]*id;
138
0
            const float x1 = x[i*qk + qk/2 + j]*id;
139
140
0
            const uint8_t xi0 = MIN(31, (int8_t)(x0 + 16.5f));
141
0
            const uint8_t xi1 = MIN(31, (int8_t)(x1 + 16.5f));
142
143
0
            y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
144
145
            // get the 5-th bit and store it in qh at the right position
146
0
            qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
147
0
            qh |= ((xi1 & 0x10u) >> 4) << (j + qk/2);
148
0
        }
149
150
0
        memcpy(&y[i].qh, &qh, sizeof(qh));
151
0
    }
152
0
}
153
154
0
void quantize_row_q5_1_ref(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t k) {
155
0
    const int qk = QK5_1;
156
157
0
    assert(k % qk == 0);
158
159
0
    const int nb = k / qk;
160
161
0
    for (int i = 0; i < nb; i++) {
162
0
        float min = FLT_MAX;
163
0
        float max = -FLT_MAX;
164
165
0
        for (int j = 0; j < qk; j++) {
166
0
            const float v = x[i*qk + j];
167
168
0
            if (v < min) min = v;
169
0
            if (v > max) max = v;
170
0
        }
171
172
0
        const float d  = (max - min) / ((1 << 5) - 1);
173
0
        const float id = d ? 1.0f/d : 0.0f;
174
175
0
        y[i].d = GGML_FP32_TO_FP16(d);
176
0
        y[i].m = GGML_FP32_TO_FP16(min);
177
178
0
        uint32_t qh = 0;
179
180
0
        for (int j = 0; j < qk/2; ++j) {
181
0
            const float x0 = (x[i*qk + 0    + j] - min)*id;
182
0
            const float x1 = (x[i*qk + qk/2 + j] - min)*id;
183
184
0
            const uint8_t xi0 = (uint8_t)(x0 + 0.5f);
185
0
            const uint8_t xi1 = (uint8_t)(x1 + 0.5f);
186
187
0
            y[i].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
188
189
            // get the 5-th bit and store it in qh at the right position
190
0
            qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
191
0
            qh |= ((xi1 & 0x10u) >> 4) << (j + qk/2);
192
0
        }
193
194
0
        memcpy(&y[i].qh, &qh, sizeof(y[i].qh));
195
0
    }
196
0
}
197
198
// reference implementation for deterministic creation of model files
199
0
void quantize_row_q8_0_ref(const float * GGML_RESTRICT x, block_q8_0 * GGML_RESTRICT y, int64_t k) {
200
0
    assert(k % QK8_0 == 0);
201
0
    const int nb = k / QK8_0;
202
203
0
    for (int i = 0; i < nb; i++) {
204
0
        float amax = 0.0f; // absolute max
205
206
0
        for (int j = 0; j < QK8_0; j++) {
207
0
            const float v = x[i*QK8_0 + j];
208
0
            amax = MAX(amax, fabsf(v));
209
0
        }
210
211
0
        const float d = amax / ((1 << 7) - 1);
212
0
        const float id = d ? 1.0f/d : 0.0f;
213
214
0
        y[i].d = GGML_FP32_TO_FP16(d);
215
216
0
        for (int j = 0; j < QK8_0; ++j) {
217
0
            const float x0 = x[i*QK8_0 + j]*id;
218
219
0
            y[i].qs[j] = roundf(x0);
220
0
        }
221
0
    }
222
0
}
223
224
// reference implementation for deterministic creation of model files
225
0
void quantize_row_q8_1_ref(const float * GGML_RESTRICT x, block_q8_1 * GGML_RESTRICT y, int64_t k) {
226
0
    assert(QK8_1 == 32);
227
0
    assert(k % QK8_1 == 0);
228
0
    const int nb = k / QK8_1;
229
230
0
    for (int i = 0; i < nb; i++) {
231
0
        float amax = 0.0f; // absolute max
232
233
0
        for (int j = 0; j < QK8_1; j++) {
234
0
            const float v = x[i*QK8_1 + j];
235
0
            amax = MAX(amax, fabsf(v));
236
0
        }
237
238
0
        const float d = amax / ((1 << 7) - 1);
239
0
        const float id = d ? 1.0f/d : 0.0f;
240
241
0
        y[i].d = GGML_FP32_TO_FP16(d);
242
243
0
        int sum = 0;
244
245
0
        for (int j = 0; j < QK8_1/2; ++j) {
246
0
            const float v0 = x[i*QK8_1           + j]*id;
247
0
            const float v1 = x[i*QK8_1 + QK8_1/2 + j]*id;
248
249
0
            y[i].qs[          j] = roundf(v0);
250
0
            y[i].qs[QK8_1/2 + j] = roundf(v1);
251
252
0
            sum += y[i].qs[          j];
253
0
            sum += y[i].qs[QK8_1/2 + j];
254
0
        }
255
256
0
        y[i].s = GGML_FP32_TO_FP16(sum*d);
257
0
    }
258
0
}
259
260
0
static inline int best_index_mxfp4(float x, float e) {
261
0
    int best_index = 0;
262
0
    float best_err = fabsf(kvalues_mxfp4[0]*e - x);
263
0
    for (int i = 1; i < 16; i++) {
264
0
        float err = fabsf(kvalues_mxfp4[i]*e - x);
265
0
        if (err < best_err) {
266
0
            best_index = i;
267
0
            best_err = err;
268
0
        }
269
0
    }
270
0
    return best_index;
271
0
}
272
273
0
void quantize_row_mxfp4_ref(const float * GGML_RESTRICT x, block_mxfp4 * GGML_RESTRICT y, int64_t k) {
274
0
    static const int qk = QK_MXFP4;
275
276
0
    assert(k % qk == 0);
277
278
0
    const int nb = k / qk;
279
280
0
    for (int i = 0; i < nb; i++) {
281
0
        float amax = 0.0f; // absolute max
282
283
0
        for (int j = 0; j < qk; j++) {
284
0
            const float v = x[i*qk + j];
285
286
0
            if (amax < fabsf(v)) {
287
0
                amax = fabsf(v);
288
0
            }
289
0
        }
290
291
0
        const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 2 + 127) : 0;
292
293
0
        const float d = GGML_E8M0_TO_FP32_HALF(e);
294
295
0
        y[i].e = e;
296
297
0
        for (int j = 0; j < qk/2; ++j) {
298
0
            const uint8_t x0 = best_index_mxfp4(x[i*qk + 0    + j], d);
299
0
            const uint8_t x1 = best_index_mxfp4(x[i*qk + qk/2 + j], d);
300
301
0
            y[i].qs[j]  = x0;
302
0
            y[i].qs[j] |= x1 << 4;
303
0
        }
304
0
    }
305
0
}
306
307
0
void dequantize_row_q4_0(const block_q4_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
308
0
    static const int qk = QK4_0;
309
310
0
    assert(k % qk == 0);
311
312
0
    const int nb = k / qk;
313
314
0
    for (int i = 0; i < nb; i++) {
315
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
316
317
0
        for (int j = 0; j < qk/2; ++j) {
318
0
            const int x0 = (x[i].qs[j] & 0x0F) - 8;
319
0
            const int x1 = (x[i].qs[j] >>   4) - 8;
320
321
0
            y[i*qk + j + 0   ] = x0*d;
322
0
            y[i*qk + j + qk/2] = x1*d;
323
0
        }
324
0
    }
325
0
}
326
327
0
void dequantize_row_q4_1(const block_q4_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
328
0
    static const int qk = QK4_1;
329
330
0
    assert(k % qk == 0);
331
332
0
    const int nb = k / qk;
333
334
0
    for (int i = 0; i < nb; i++) {
335
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
336
0
        const float m = GGML_FP16_TO_FP32(x[i].m);
337
338
0
        for (int j = 0; j < qk/2; ++j) {
339
0
            const int x0 = (x[i].qs[j] & 0x0F);
340
0
            const int x1 = (x[i].qs[j] >>   4);
341
342
0
            y[i*qk + j + 0   ] = x0*d + m;
343
0
            y[i*qk + j + qk/2] = x1*d + m;
344
0
        }
345
0
    }
346
0
}
347
348
0
void dequantize_row_q5_0(const block_q5_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
349
0
    static const int qk = QK5_0;
350
351
0
    assert(k % qk == 0);
352
353
0
    const int nb = k / qk;
354
355
0
    for (int i = 0; i < nb; i++) {
356
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
357
358
0
        uint32_t qh;
359
0
        memcpy(&qh, x[i].qh, sizeof(qh));
360
361
0
        for (int j = 0; j < qk/2; ++j) {
362
0
            const uint8_t xh_0 = ((qh >> (j +  0)) << 4) & 0x10;
363
0
            const uint8_t xh_1 = ((qh >> (j + 12))     ) & 0x10;
364
365
0
            const int32_t x0 = ((x[i].qs[j] & 0x0F) | xh_0) - 16;
366
0
            const int32_t x1 = ((x[i].qs[j] >>   4) | xh_1) - 16;
367
368
0
            y[i*qk + j + 0   ] = x0*d;
369
0
            y[i*qk + j + qk/2] = x1*d;
370
0
        }
371
0
    }
372
0
}
373
374
0
void dequantize_row_q5_1(const block_q5_1 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
375
0
    static const int qk = QK5_1;
376
377
0
    assert(k % qk == 0);
378
379
0
    const int nb = k / qk;
380
381
0
    for (int i = 0; i < nb; i++) {
382
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
383
0
        const float m = GGML_FP16_TO_FP32(x[i].m);
384
385
0
        uint32_t qh;
386
0
        memcpy(&qh, x[i].qh, sizeof(qh));
387
388
0
        for (int j = 0; j < qk/2; ++j) {
389
0
            const uint8_t xh_0 = ((qh >> (j +  0)) << 4) & 0x10;
390
0
            const uint8_t xh_1 = ((qh >> (j + 12))     ) & 0x10;
391
392
0
            const int x0 = (x[i].qs[j] & 0x0F) | xh_0;
393
0
            const int x1 = (x[i].qs[j] >>   4) | xh_1;
394
395
0
            y[i*qk + j + 0   ] = x0*d + m;
396
0
            y[i*qk + j + qk/2] = x1*d + m;
397
0
        }
398
0
    }
399
0
}
400
401
0
void dequantize_row_q8_0(const block_q8_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
402
0
    static const int qk = QK8_0;
403
404
0
    assert(k % qk == 0);
405
406
0
    const int nb = k / qk;
407
408
0
    for (int i = 0; i < nb; i++) {
409
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
410
411
0
        for (int j = 0; j < qk; ++j) {
412
0
            y[i*qk + j] = x[i].qs[j]*d;
413
0
        }
414
0
    }
415
0
}
416
417
0
void dequantize_row_mxfp4(const block_mxfp4 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
418
0
    static const int qk = QK_MXFP4;
419
420
0
    assert(k % qk == 0);
421
422
0
    const int nb = k / qk;
423
424
0
    for (int i = 0; i < nb; i++) {
425
0
        const float d = GGML_E8M0_TO_FP32_HALF(x[i].e);
426
427
0
        for (int j = 0; j < qk/2; ++j) {
428
0
            const int8_t x0 = kvalues_mxfp4[x[i].qs[j] & 0x0F];
429
0
            const int8_t x1 = kvalues_mxfp4[x[i].qs[j] >>   4];
430
431
0
            y[i*qk + j + 0   ] = x0*d;
432
0
            y[i*qk + j + qk/2] = x1*d;
433
0
        }
434
0
    }
435
0
}
436
437
//
438
// 2-6 bit quantization in super-blocks
439
//
440
441
//
442
// ===================== Helper functions
443
//
444
0
static inline int nearest_int(float fval) {
445
0
    assert(fabsf(fval) <= 4194303.f);
446
0
    float val = fval + 12582912.f;
447
0
    int i; memcpy(&i, &val, sizeof(int));
448
0
    return (i & 0x007fffff) - 0x00400000;
449
0
}
450
451
static float make_qx_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, int rmse_type,
452
0
        const float * GGML_RESTRICT qw) {
453
0
    float max = 0;
454
0
    float amax = 0;
455
0
    for (int i = 0; i < n; ++i) {
456
0
        float ax = fabsf(x[i]);
457
0
        if (ax > amax) { amax = ax; max = x[i]; }
458
0
    }
459
0
    if (amax < GROUP_MAX_EPS) { // all zero
460
0
        for (int i = 0; i < n; ++i) {
461
0
            L[i] = 0;
462
0
        }
463
0
        return 0.f;
464
0
    }
465
0
    float iscale = -nmax / max;
466
0
    if (rmse_type == 0) {
467
0
        for (int i = 0; i < n; ++i) {
468
0
            int l = nearest_int(iscale * x[i]);
469
0
            L[i] = nmax + MAX(-nmax, MIN(nmax-1, l));
470
0
        }
471
0
        return 1/iscale;
472
0
    }
473
0
    bool return_early = false;
474
0
    if (rmse_type < 0) {
475
0
        rmse_type = -rmse_type;
476
0
        return_early = true;
477
0
    }
478
0
    float sumlx = 0;
479
0
    float suml2 = 0;
480
#ifdef HAVE_BUGGY_APPLE_LINKER
481
    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
482
    for (volatile int i = 0; i < n; ++i) {
483
#else
484
0
    for (int i = 0; i < n; ++i) {
485
0
#endif
486
0
        int l = nearest_int(iscale * x[i]);
487
0
        l = MAX(-nmax, MIN(nmax-1, l));
488
0
        L[i] = l + nmax;
489
0
        float w = qw ? qw[i] : rmse_type == 1 ? x[i] * x[i] : rmse_type == 2 ? 1 : rmse_type == 3 ? fabsf(x[i]) : sqrtf(fabsf(x[i]));
490
0
        sumlx += w*x[i]*l;
491
0
        suml2 += w*l*l;
492
0
    }
493
0
    float scale = suml2 ? sumlx/suml2 : 0.0f;
494
0
    if (return_early) return suml2 > 0 ? 0.5f*(scale + 1/iscale) : 1/iscale;
495
0
    float best = scale * sumlx;
496
0
    for (int is = -9; is <= 9; ++is) {
497
0
        if (is == 0) {
498
0
            continue;
499
0
        }
500
0
        iscale = -(nmax + 0.1f*is) / max;
501
0
        sumlx = suml2 = 0;
502
0
        for (int i = 0; i < n; ++i) {
503
0
            int l = nearest_int(iscale * x[i]);
504
0
            l = MAX(-nmax, MIN(nmax-1, l));
505
0
            float w = qw ? qw[i] : rmse_type == 1 ? x[i] * x[i] : rmse_type == 2 ? 1 : rmse_type == 3 ? fabsf(x[i]) : sqrtf(fabsf(x[i]));
506
0
            sumlx += w*x[i]*l;
507
0
            suml2 += w*l*l;
508
0
        }
509
0
        if (suml2 > 0 && sumlx*sumlx > best*suml2) {
510
0
            for (int i = 0; i < n; ++i) {
511
0
                int l = nearest_int(iscale * x[i]);
512
0
                L[i] = nmax + MAX(-nmax, MIN(nmax-1, l));
513
0
            }
514
0
            scale = sumlx/suml2; best = scale*sumlx;
515
0
        }
516
0
    }
517
0
    return scale;
518
0
}
519
520
0
static float make_q3_quants(int n, int nmax, const float * GGML_RESTRICT x, int8_t * GGML_RESTRICT L, bool do_rmse) {
521
0
    float max = 0;
522
0
    float amax = 0;
523
0
    for (int i = 0; i < n; ++i) {
524
0
        float ax = fabsf(x[i]);
525
0
        if (ax > amax) { amax = ax; max = x[i]; }
526
0
    }
527
0
    if (amax < GROUP_MAX_EPS) { // all zero
528
0
        for (int i = 0; i < n; ++i) { L[i] = 0; }
529
0
        return 0.f;
530
0
    }
531
0
    float iscale = -nmax / max;
532
0
    if (do_rmse) {
533
0
        float sumlx = 0;
534
0
        float suml2 = 0;
535
0
        for (int i = 0; i < n; ++i) {
536
0
            int l = nearest_int(iscale * x[i]);
537
0
            l = MAX(-nmax, MIN(nmax-1, l));
538
0
            L[i] = l;
539
0
            float w = x[i]*x[i];
540
0
            sumlx += w*x[i]*l;
541
0
            suml2 += w*l*l;
542
0
        }
543
0
        for (int itry = 0; itry < 5; ++itry) {
544
0
            int n_changed = 0;
545
0
            for (int i = 0; i < n; ++i) {
546
0
                float w = x[i]*x[i];
547
0
                float slx = sumlx - w*x[i]*L[i];
548
0
                if (slx > 0) {
549
0
                    float sl2 = suml2 - w*L[i]*L[i];
550
0
                    int new_l = nearest_int(x[i] * sl2 / slx);
551
0
                    new_l = MAX(-nmax, MIN(nmax-1, new_l));
552
0
                    if (new_l != L[i]) {
553
0
                        slx += w*x[i]*new_l;
554
0
                        sl2 += w*new_l*new_l;
555
0
                        if (sl2 > 0 && slx*slx*suml2 > sumlx*sumlx*sl2) {
556
0
                            L[i] = new_l; sumlx = slx; suml2 = sl2;
557
0
                            ++n_changed;
558
0
                        }
559
0
                    }
560
0
                }
561
0
            }
562
0
            if (!n_changed) {
563
0
                break;
564
0
            }
565
0
        }
566
0
        for (int i = 0; i < n; ++i) {
567
0
            L[i] += nmax;
568
0
        }
569
0
        return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
570
0
    }
571
0
    for (int i = 0; i < n; ++i) {
572
0
        int l = nearest_int(iscale * x[i]);
573
0
        l = MAX(-nmax, MIN(nmax-1, l));
574
0
        L[i] = l + nmax;
575
0
    }
576
0
    return 1/iscale;
577
0
}
578
579
static float make_qkx1_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min,
580
0
        int ntry, float alpha) {
581
0
    float min = x[0];
582
0
    float max = x[0];
583
0
    for (int i = 1; i < n; ++i) {
584
0
        if (x[i] < min) min = x[i];
585
0
        if (x[i] > max) max = x[i];
586
0
    }
587
0
    if (max == min) {
588
0
        for (int i = 0; i < n; ++i) L[i] = 0;
589
0
        *the_min = 0;
590
0
        return 0.f;
591
0
    }
592
0
    if (min > 0) min = 0;
593
0
    float iscale = nmax/(max - min);
594
0
    float scale = 1/iscale;
595
0
    for (int itry = 0; itry < ntry; ++itry) {
596
0
        float sumlx = 0; int suml2 = 0;
597
0
        bool did_change = false;
598
0
        for (int i = 0; i < n; ++i) {
599
0
            int l = nearest_int(iscale*(x[i] - min));
600
0
            l = MAX(0, MIN(nmax, l));
601
0
            if (l != L[i]) {
602
0
                L[i] = l;
603
0
                did_change = true;
604
0
            }
605
0
            sumlx += (x[i] - min)*l;
606
0
            suml2 += l*l;
607
0
        }
608
0
        scale = sumlx/suml2;
609
0
        float sum = 0;
610
0
        for (int i = 0; i < n; ++i) {
611
0
            sum += x[i] - scale*L[i];
612
0
        }
613
0
        min = alpha*min + (1 - alpha)*sum/n;
614
0
        if (min > 0) min = 0;
615
0
        iscale = 1/scale;
616
0
        if (!did_change) break;
617
0
    }
618
0
    *the_min = -min;
619
0
    return scale;
620
0
}
621
622
static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
623
        uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
624
0
        float rmin, float rdelta, int nstep, bool use_mad) {
625
0
    float min = x[0];
626
0
    float max = x[0];
627
0
    float sum_w = weights[0];
628
0
    float sum_x = sum_w * x[0];
629
#ifdef HAVE_BUGGY_APPLE_LINKER
630
    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
631
    for (volatile int i = 1; i < n; ++i) {
632
#else
633
0
    for (int i = 1; i < n; ++i) {
634
0
#endif
635
0
        if (x[i] < min) min = x[i];
636
0
        if (x[i] > max) max = x[i];
637
0
        float w = weights[i];
638
0
        sum_w += w;
639
0
        sum_x += w * x[i];
640
0
    }
641
0
    if (min > 0) min = 0;
642
0
    if (max == min) {
643
0
        for (int i = 0; i < n; ++i) L[i] = 0;
644
0
        *the_min = -min;
645
0
        return 0.f;
646
0
    }
647
0
    float iscale = nmax/(max - min);
648
0
    float scale = 1/iscale;
649
0
    float best_error = 0;
650
0
    for (int i = 0; i < n; ++i) {
651
0
        int l = nearest_int(iscale*(x[i] - min));
652
0
        L[i] = MAX(0, MIN(nmax, l));
653
0
        float diff = scale * L[i] + min - x[i];
654
0
        diff = use_mad ? fabsf(diff) : diff * diff;
655
0
        float w = weights[i];
656
0
        best_error += w * diff;
657
0
    }
658
0
    if (nstep < 1) {
659
0
        *the_min = -min;
660
0
        return scale;
661
0
    }
662
0
    for (int is = 0; is <= nstep; ++is) {
663
0
        iscale = (rmin + rdelta*is + nmax)/(max - min);
664
0
        float sum_l = 0, sum_l2 = 0, sum_xl = 0;
665
0
        for (int i = 0; i < n; ++i) {
666
0
            int l = nearest_int(iscale*(x[i] - min));
667
0
            l = MAX(0, MIN(nmax, l));
668
0
            Laux[i] = l;
669
0
            float w = weights[i];
670
0
            sum_l += w*l;
671
0
            sum_l2 += w*l*l;
672
0
            sum_xl += w*l*x[i];
673
0
        }
674
0
        float D = sum_w * sum_l2 - sum_l * sum_l;
675
0
        if (D > 0) {
676
0
            float this_scale = (sum_w * sum_xl - sum_x * sum_l)/D;
677
0
            float this_min   = (sum_l2 * sum_x - sum_l * sum_xl)/D;
678
0
            if (this_min > 0) {
679
0
                this_min = 0;
680
0
                this_scale = sum_xl / sum_l2;
681
0
            }
682
0
            float cur_error = 0;
683
0
            for (int i = 0; i < n; ++i) {
684
0
                float diff = this_scale * Laux[i] + this_min - x[i];
685
0
                diff = use_mad ? fabsf(diff) : diff * diff;
686
0
                float w = weights[i];
687
0
                cur_error += w * diff;
688
0
            }
689
0
            if (cur_error < best_error) {
690
0
                for (int i = 0; i < n; ++i) {
691
0
                    L[i] = Laux[i];
692
0
                }
693
0
                best_error = cur_error;
694
0
                scale = this_scale;
695
0
                min = this_min;
696
0
            }
697
0
        }
698
0
    }
699
0
    *the_min = -min;
700
0
    return scale;
701
0
}
702
703
0
static inline void get_scale_min_k4(int j, const uint8_t * GGML_RESTRICT q, uint8_t * GGML_RESTRICT d, uint8_t * GGML_RESTRICT m) {
704
0
    if (j < 4) {
705
0
        *d = q[j] & 63; *m = q[j + 4] & 63;
706
0
    } else {
707
0
        *d = (q[j+4] & 0xF) | ((q[j-4] >> 6) << 4);
708
0
        *m = (q[j+4] >>  4) | ((q[j-0] >> 6) << 4);
709
0
    }
710
0
}
711
712
//========================- 2-bit (de)-quantization
713
714
0
void quantize_row_q2_K_ref(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int64_t k) {
715
0
    assert(k % QK_K == 0);
716
0
    const int nb = k / QK_K;
717
718
0
    uint8_t L[QK_K];
719
0
    uint8_t Laux[16];
720
0
    float   weights[16];
721
0
    float mins[QK_K/16];
722
0
    float scales[QK_K/16];
723
724
0
    const float q4scale = 15.f;
725
726
0
    for (int i = 0; i < nb; i++) {
727
0
        float max_scale = 0; // as we are deducting the min, scales are always positive
728
0
        float max_min = 0;
729
0
        for (int j = 0; j < QK_K/16; ++j) {
730
0
            for (int l = 0; l < 16; ++l) weights[l] = fabsf(x[16*j + l]);
731
0
            scales[j] = make_qkx2_quants(16, 3, x + 16*j, weights, L + 16*j, &mins[j], Laux, -0.5f, 0.1f, 15, true);
732
0
            float scale = scales[j];
733
0
            if (scale > max_scale) {
734
0
                max_scale = scale;
735
0
            }
736
0
            float min = mins[j];
737
0
            if (min > max_min) {
738
0
                max_min = min;
739
0
            }
740
0
        }
741
742
0
        if (max_scale > 0) {
743
0
            float iscale = q4scale/max_scale;
744
0
            for (int j = 0; j < QK_K/16; ++j) {
745
0
                int l = nearest_int(iscale*scales[j]);
746
0
                y[i].scales[j] = l;
747
0
            }
748
0
            y[i].d = GGML_FP32_TO_FP16(max_scale/q4scale);
749
0
        } else {
750
0
            for (int j = 0; j < QK_K/16; ++j) y[i].scales[j] = 0;
751
0
            y[i].d = GGML_FP32_TO_FP16(0.f);
752
0
        }
753
0
        if (max_min > 0) {
754
0
            float iscale = q4scale/max_min;
755
0
            for (int j = 0; j < QK_K/16; ++j) {
756
0
                int l = nearest_int(iscale*mins[j]);
757
0
                y[i].scales[j] |= (l << 4);
758
0
            }
759
0
            y[i].dmin = GGML_FP32_TO_FP16(max_min/q4scale);
760
0
        } else {
761
0
            y[i].dmin = GGML_FP32_TO_FP16(0.f);
762
0
        }
763
0
        for (int j = 0; j < QK_K/16; ++j) {
764
0
            const float d = GGML_FP16_TO_FP32(y[i].d) * (y[i].scales[j] & 0xF);
765
0
            if (!d) continue;
766
0
            const float dm = GGML_FP16_TO_FP32(y[i].dmin) * (y[i].scales[j] >> 4);
767
0
            for (int ii = 0; ii < 16; ++ii) {
768
0
                int l = nearest_int((x[16*j + ii] + dm)/d);
769
0
                l = MAX(0, MIN(3, l));
770
0
                L[16*j + ii] = l;
771
0
            }
772
0
        }
773
774
0
        for (int j = 0; j < QK_K; j += 128) {
775
0
            for (int l = 0; l < 32; ++l) {
776
0
                y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
777
0
            }
778
0
        }
779
780
0
        x += QK_K;
781
0
    }
782
0
}
783
784
0
void dequantize_row_q2_K(const block_q2_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
785
0
    assert(k % QK_K == 0);
786
0
    const int nb = k / QK_K;
787
788
0
    for (int i = 0; i < nb; i++) {
789
790
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
791
0
        const float min = GGML_FP16_TO_FP32(x[i].dmin);
792
793
0
        const uint8_t * q = x[i].qs;
794
795
0
        int is = 0;
796
0
        float dl, ml;
797
0
        for (int n = 0; n < QK_K; n += 128) {
798
0
            int shift = 0;
799
0
            for (int j = 0; j < 4; ++j) {
800
801
0
                uint8_t sc = x[i].scales[is++];
802
0
                dl = d * (sc & 0xF); ml = min * (sc >> 4);
803
0
                for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l] >> shift) & 3)) - ml;
804
805
0
                sc = x[i].scales[is++];
806
0
                dl = d * (sc & 0xF); ml = min * (sc >> 4);
807
0
                for (int l = 0; l < 16; ++l) *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3)) - ml;
808
809
0
                shift += 2;
810
0
            }
811
0
            q += 32;
812
0
        }
813
0
    }
814
0
}
815
816
static float make_qkx3_quants(int n, int nmax, const float * GGML_RESTRICT x, const float * GGML_RESTRICT weights,
817
        uint8_t * GGML_RESTRICT L, float * GGML_RESTRICT the_min, uint8_t * GGML_RESTRICT Laux,
818
0
        float rmin, float rdelta, int nstep, bool use_mad) {
819
0
    float min = x[0];
820
0
    float max = x[0];
821
0
    float sum_w = weights ? weights[0] : x[0]*x[0];
822
0
    float sum_x = sum_w * x[0];
823
#ifdef HAVE_BUGGY_APPLE_LINKER
824
    // use 'volatile' to prevent unroll and work around a bug in Apple ld64 1015.7
825
    for (volatile int i = 1; i < n; ++i) {
826
#else
827
0
    for (int i = 1; i < n; ++i) {
828
0
#endif
829
0
        if (x[i] < min) min = x[i];
830
0
        if (x[i] > max) max = x[i];
831
0
        float w = weights ? weights[i] : x[i]*x[i];
832
0
        sum_w += w;
833
0
        sum_x += w * x[i];
834
0
    }
835
0
    if (min > 0) {
836
0
        min = 0;
837
0
    }
838
0
    if (max <= min) {
839
0
        memset(L, 0, n);
840
0
        *the_min = -min;
841
0
        return 0.f;
842
0
    }
843
0
    float iscale = nmax/(max - min);
844
0
    float scale = 1/iscale;
845
0
    float best_mad = 0;
846
0
    for (int i = 0; i < n; ++i) {
847
0
        int l = nearest_int(iscale*(x[i] - min));
848
0
        L[i] = MAX(0, MIN(nmax, l));
849
0
        float diff = scale * L[i] + min - x[i];
850
0
        diff = use_mad ? fabsf(diff) : diff*diff;
851
0
        float w = weights ? weights[i] : x[i]*x[i];
852
0
        best_mad += w * diff;
853
0
    }
854
0
    if (nstep < 1) {
855
0
        *the_min = -min;
856
0
        return scale;
857
0
    }
858
0
    for (int is = 0; is <= nstep; ++is) {
859
0
        iscale = (rmin + rdelta*is + nmax)/(max - min);
860
0
        float sum_l = 0, sum_l2 = 0, sum_xl = 0;
861
0
        for (int i = 0; i < n; ++i) {
862
0
            int l = nearest_int(iscale*(x[i] - min));
863
0
            l = MAX(0, MIN(nmax, l));
864
0
            Laux[i] = l;
865
0
            float w = weights ? weights[i] : x[i]*x[i];
866
0
            sum_l  += w*l;
867
0
            sum_l2 += w*l*l;
868
0
            sum_xl += w*l*x[i];
869
0
        }
870
0
        float D = sum_w * sum_l2 - sum_l * sum_l;
871
0
        if (D > 0) {
872
0
            float this_scale = (sum_w * sum_xl - sum_x * sum_l)/D;
873
0
            float this_min   = (sum_l2 * sum_x - sum_l * sum_xl)/D;
874
0
            if (this_min > 0) {
875
0
                this_min = 0;
876
0
                this_scale = sum_xl / sum_l2;
877
0
            }
878
0
            float mad = 0;
879
0
            for (int i = 0; i < n; ++i) {
880
0
                float diff = this_scale * Laux[i] + this_min - x[i];
881
0
                diff = use_mad ? fabsf(diff) : diff*diff;
882
0
                float w = weights ? weights[i] : x[i]*x[i];
883
0
                mad += w * diff;
884
0
            }
885
0
            if (mad < best_mad) {
886
0
                for (int i = 0; i < n; ++i) {
887
0
                    L[i] = Laux[i];
888
0
                }
889
0
                best_mad = mad;
890
0
                scale = this_scale;
891
0
                min = this_min;
892
0
            }
893
0
        }
894
0
    }
895
0
    *the_min = -min;
896
0
    return scale;
897
0
}
898
899
0
static float make_qp_quants(int n, int nmax, const float * GGML_RESTRICT x, uint8_t * GGML_RESTRICT L, const float * quant_weights) {
900
0
    float max = 0;
901
0
    for (int i = 0; i < n; ++i) {
902
0
        max = MAX(max, x[i]);
903
0
    }
904
0
    if (max < GROUP_MAX_EPS) { // all zero
905
0
        for (int i = 0; i < n; ++i) { L[i] = 0; }
906
0
        return 0.f;
907
0
    }
908
0
    float iscale = nmax / max;
909
0
    for (int i = 0; i < n; ++i) {
910
0
        L[i] = nearest_int(iscale * x[i]);
911
0
    }
912
0
    float scale = 1/iscale;
913
0
    float best_mse = 0;
914
0
    for (int i = 0; i < n; ++i) {
915
0
        float diff = x[i] - scale*L[i];
916
0
        float w = quant_weights[i];
917
0
        best_mse += w*diff*diff;
918
0
    }
919
0
    for (int is = -4; is <= 4; ++is) {
920
0
        if (is == 0) continue;
921
0
        float iscale_is = (0.1f*is + nmax)/max;
922
0
        float scale_is = 1/iscale_is;
923
0
        float mse = 0;
924
0
        for (int i = 0; i < n; ++i) {
925
0
            int l = nearest_int(iscale_is*x[i]);
926
0
            l = MIN(nmax, l);
927
0
            float diff = x[i] - scale_is*l;
928
0
            float w = quant_weights[i];
929
0
            mse += w*diff*diff;
930
0
        }
931
0
        if (mse < best_mse) {
932
0
            best_mse = mse;
933
0
            iscale = iscale_is;
934
0
        }
935
0
    }
936
0
    float sumlx = 0;
937
0
    float suml2 = 0;
938
0
    for (int i = 0; i < n; ++i) {
939
0
        int l = nearest_int(iscale * x[i]);
940
0
        l = MIN(nmax, l);
941
0
        L[i] = l;
942
0
        float w = quant_weights[i];
943
0
        sumlx += w*x[i]*l;
944
0
        suml2 += w*l*l;
945
0
    }
946
0
    for (int itry = 0; itry < 5; ++itry) {
947
0
        int n_changed = 0;
948
0
        for (int i = 0; i < n; ++i) {
949
0
            float w = quant_weights[i];
950
0
            float slx = sumlx - w*x[i]*L[i];
951
0
            float sl2 = suml2 - w*L[i]*L[i];
952
0
            if (slx > 0 && sl2 > 0) {
953
0
                int new_l = nearest_int(x[i] * sl2 / slx);
954
0
                new_l = MIN(nmax, new_l);
955
0
                if (new_l != L[i]) {
956
0
                    slx += w*x[i]*new_l;
957
0
                    sl2 += w*new_l*new_l;
958
0
                    if (slx*slx*suml2 > sumlx*sumlx*sl2) {
959
0
                        L[i] = new_l; sumlx = slx; suml2 = sl2;
960
0
                        ++n_changed;
961
0
                    }
962
0
                }
963
0
            }
964
0
        }
965
0
        if (!n_changed) {
966
0
            break;
967
0
        }
968
0
    }
969
0
    return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
970
0
}
971
972
0
static void quantize_row_q2_K_impl(const float * GGML_RESTRICT x, block_q2_K * GGML_RESTRICT y, int k, const float * GGML_RESTRICT quant_weights) {
973
0
    GGML_ASSERT(quant_weights);
974
0
    assert(k % QK_K == 0);
975
0
    const int nb = k / QK_K;
976
0
    const bool requantize = true;
977
978
0
    uint8_t L[QK_K];
979
0
    uint8_t Laux[16];
980
0
    float mins[QK_K/16];
981
0
    float scales[QK_K/16];
982
0
    float sw[QK_K/16];
983
0
    float weight[16];
984
0
    uint8_t Ls[QK_K/16], Lm[QK_K/16];
985
986
0
    for (int i = 0; i < nb; i++) {
987
0
        memset(sw, 0, QK_K/16*sizeof(float));
988
0
        float sumx2 = 0;
989
0
        for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
990
0
        float sigma2 = sumx2/QK_K;
991
0
        for (int j = 0; j < QK_K/16; ++j) {
992
0
            const float * GGML_RESTRICT qw = quant_weights + QK_K * i + 16*j;
993
0
            for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j + l]*x[16*j + l]);
994
0
            for (int l = 0; l < QK_K/16; ++l) sw[j] += weight[l];
995
0
            scales[j] = make_qkx3_quants(16, 3, x + 16*j, weight, L + 16*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
996
0
        }
997
998
0
        float dm, mm;
999
0
        dm  = make_qp_quants(QK_K/16, 15, scales, Ls, sw);
1000
0
        mm  = make_qp_quants(QK_K/16, 15, mins,   Lm, sw);
1001
1002
0
        y[i].d    = GGML_FP32_TO_FP16(dm);
1003
0
        y[i].dmin = GGML_FP32_TO_FP16(mm);
1004
0
        dm        = GGML_FP16_TO_FP32(y[i].d);
1005
0
        mm        = GGML_FP16_TO_FP32(y[i].dmin);
1006
1007
0
        for (int j = 0; j < QK_K/16; ++j) {
1008
0
            y[i].scales[j] = Ls[j] | (Lm[j] << 4);
1009
0
        }
1010
1011
0
        if (requantize) {
1012
0
            for (int j = 0; j < QK_K/16; ++j) {
1013
0
                const float d = dm * (y[i].scales[j] & 0xF);
1014
0
                if (!d) continue;
1015
0
                const float m = mm * (y[i].scales[j] >> 4);
1016
0
                for (int ii = 0; ii < 16; ++ii) {
1017
0
                    int l = nearest_int((x[16*j + ii] + m)/d);
1018
0
                    l = MAX(0, MIN(3, l));
1019
0
                    L[16*j + ii] = l;
1020
0
                }
1021
0
            }
1022
0
        }
1023
1024
0
        for (int j = 0; j < QK_K; j += 128) {
1025
0
            for (int l = 0; l < 32; ++l) {
1026
0
                y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
1027
0
            }
1028
0
        }
1029
1030
0
        x += QK_K;
1031
0
    }
1032
0
}
1033
1034
0
size_t quantize_q2_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1035
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q2_K, n_per_row);
1036
0
    if (!quant_weights) {
1037
0
        quantize_row_q2_K_ref(src, dst, (int64_t)nrow*n_per_row);
1038
0
    }
1039
0
    else {
1040
0
        char * qrow = (char *)dst;
1041
0
        for (int64_t row = 0; row < nrow; ++row) {
1042
0
            quantize_row_q2_K_impl(src, (block_q2_K*)qrow, n_per_row, quant_weights);
1043
0
            src += n_per_row;
1044
0
            qrow += row_size;
1045
0
        }
1046
0
    }
1047
0
    return nrow * row_size;
1048
0
}
1049
1050
//========================= 3-bit (de)-quantization
1051
1052
0
void quantize_row_q3_K_ref(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t k) {
1053
0
    assert(k % QK_K == 0);
1054
0
    const int nb = k / QK_K;
1055
1056
0
    int8_t L[QK_K];
1057
0
    float scales[QK_K / 16];
1058
1059
0
    for (int i = 0; i < nb; i++) {
1060
1061
0
        float max_scale = 0;
1062
0
        float amax = 0;
1063
0
        for (int j = 0; j < QK_K/16; ++j) {
1064
0
            scales[j] = make_q3_quants(16, 4, x + 16*j, L + 16*j, true);
1065
0
            float scale = fabsf(scales[j]);
1066
0
            if (scale > amax) {
1067
0
                amax = scale; max_scale = scales[j];
1068
0
            }
1069
0
        }
1070
1071
0
        memset(y[i].scales, 0, 12);
1072
0
        if (max_scale) {
1073
0
            float iscale = -32.f/max_scale;
1074
0
            for (int j = 0; j < QK_K/16; ++j) {
1075
0
                int8_t l = nearest_int(iscale*scales[j]);
1076
0
                l = MAX(-32, MIN(31, l)) + 32;
1077
0
                if (j < 8) {
1078
0
                    y[i].scales[j] = l & 0xF;
1079
0
                } else {
1080
0
                    y[i].scales[j-8] |= ((l & 0xF) << 4);
1081
0
                }
1082
0
                l >>= 4;
1083
0
                y[i].scales[j%4 + 8] |= (l << (2*(j/4)));
1084
0
            }
1085
0
            y[i].d = GGML_FP32_TO_FP16(1/iscale);
1086
0
        } else {
1087
0
            y[i].d = GGML_FP32_TO_FP16(0.f);
1088
0
        }
1089
1090
0
        int8_t sc;
1091
0
        for (int j = 0; j < QK_K/16; ++j) {
1092
0
            sc = j < 8 ? y[i].scales[j] & 0xF : y[i].scales[j-8] >> 4;
1093
0
            sc = (sc | (((y[i].scales[8 + j%4] >> (2*(j/4))) & 3) << 4)) - 32;
1094
0
            float d = GGML_FP16_TO_FP32(y[i].d) * sc;
1095
0
            if (!d) {
1096
0
                continue;
1097
0
            }
1098
0
            for (int ii = 0; ii < 16; ++ii) {
1099
0
                int l = nearest_int(x[16*j + ii]/d);
1100
0
                l = MAX(-4, MIN(3, l));
1101
0
                L[16*j + ii] = l + 4;
1102
0
            }
1103
0
        }
1104
1105
0
        memset(y[i].hmask, 0, QK_K/8);
1106
        // We put the high-bit for the 1st 8 quants into bit 0, the next 8 into bit 1, etc.
1107
0
        int m = 0;
1108
0
        uint8_t hm = 1;
1109
0
        for (int j = 0; j < QK_K; ++j) {
1110
0
            if (L[j] > 3) {
1111
0
                y[i].hmask[m] |= hm;
1112
0
                L[j] -= 4;
1113
0
            }
1114
0
            if (++m == QK_K/8) {
1115
0
                m = 0; hm <<= 1;
1116
0
            }
1117
0
        }
1118
0
        for (int j = 0; j < QK_K; j += 128) {
1119
0
            for (int l = 0; l < 32; ++l) {
1120
0
                y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
1121
0
            }
1122
0
        }
1123
1124
0
        x += QK_K;
1125
0
    }
1126
0
}
1127
1128
0
void dequantize_row_q3_K(const block_q3_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
1129
0
    assert(k % QK_K == 0);
1130
0
    const int nb = k / QK_K;
1131
1132
0
    const uint32_t kmask1 = 0x03030303;
1133
0
    const uint32_t kmask2 = 0x0f0f0f0f;
1134
1135
0
    uint32_t aux[4];
1136
0
    const int8_t * scales = (const int8_t*)aux;
1137
1138
0
    for (int i = 0; i < nb; i++) {
1139
1140
0
        const float d_all = GGML_FP16_TO_FP32(x[i].d);
1141
1142
0
        const uint8_t * GGML_RESTRICT q = x[i].qs;
1143
0
        const uint8_t * GGML_RESTRICT hm = x[i].hmask;
1144
0
        uint8_t m = 1;
1145
1146
0
        memcpy(aux, x[i].scales, 12);
1147
0
        uint32_t tmp = aux[2];
1148
0
        aux[2] = ((aux[0] >> 4) & kmask2) | (((tmp >> 4) & kmask1) << 4);
1149
0
        aux[3] = ((aux[1] >> 4) & kmask2) | (((tmp >> 6) & kmask1) << 4);
1150
0
        aux[0] = (aux[0] & kmask2) | (((tmp >> 0) & kmask1) << 4);
1151
0
        aux[1] = (aux[1] & kmask2) | (((tmp >> 2) & kmask1) << 4);
1152
1153
0
        int is = 0;
1154
0
        float dl;
1155
0
        for (int n = 0; n < QK_K; n += 128) {
1156
0
            int shift = 0;
1157
0
            for (int j = 0; j < 4; ++j) {
1158
1159
0
                dl = d_all * (scales[is++] - 32);
1160
0
                for (int l = 0; l < 16; ++l) {
1161
0
                    *y++ = dl * ((int8_t)((q[l+ 0] >> shift) & 3) - ((hm[l+ 0] & m) ? 0 : 4));
1162
0
                }
1163
1164
0
                dl = d_all * (scales[is++] - 32);
1165
0
                for (int l = 0; l < 16; ++l) {
1166
0
                    *y++ = dl * ((int8_t)((q[l+16] >> shift) & 3) - ((hm[l+16] & m) ? 0 : 4));
1167
0
                }
1168
1169
0
                shift += 2;
1170
0
                m <<= 1;
1171
0
            }
1172
0
            q += 32;
1173
0
        }
1174
1175
0
    }
1176
0
}
1177
1178
0
static void quantize_row_q3_K_impl(const float * GGML_RESTRICT x, block_q3_K * GGML_RESTRICT y, int64_t n_per_row, const float * GGML_RESTRICT quant_weights) {
1179
0
    assert(n_per_row % QK_K == 0);
1180
0
    const int nb = n_per_row / QK_K;
1181
1182
0
    int8_t L[QK_K];
1183
0
    float scales[QK_K / 16];
1184
0
    float weight[16];
1185
0
    float sw[QK_K / 16];
1186
0
    int8_t Ls[QK_K / 16];
1187
1188
0
    for (int i = 0; i < nb; i++) {
1189
1190
0
        float sumx2 = 0;
1191
0
        for (int j = 0; j < QK_K; ++j) sumx2 += x[j]*x[j];
1192
0
        float sigma2 = 2*sumx2/QK_K;
1193
1194
0
        for (int j = 0; j < QK_K/16; ++j) {
1195
0
            if (quant_weights) {
1196
0
                const float * qw = quant_weights + QK_K * i + 16*j;
1197
0
                for (int l = 0; l < 16; ++l) weight[l] = qw[l] * sqrtf(sigma2 + x[16*j+l]*x[16*j+l]);
1198
0
            } else {
1199
0
                for (int l = 0; l < 16; ++l) weight[l] = x[16*j+l]*x[16*j+l];
1200
0
            }
1201
0
            float sumw = 0;
1202
0
            for (int l = 0; l < 16; ++l) sumw += weight[l];
1203
0
            sw[j] = sumw;
1204
1205
0
            scales[j] = make_qx_quants(16, 4, x + 16*j, L + 16*j, 1, weight);
1206
1207
0
        }
1208
1209
0
        memset(y[i].scales, 0, 12);
1210
1211
0
        float d_block = make_qx_quants(QK_K/16, 32, scales, Ls, 1, sw);
1212
0
        for (int j = 0; j < QK_K/16; ++j) {
1213
0
            int l = Ls[j];
1214
0
            if (j < 8) {
1215
0
                y[i].scales[j] = l & 0xF;
1216
0
            } else {
1217
0
                y[i].scales[j-8] |= ((l & 0xF) << 4);
1218
0
            }
1219
0
            l >>= 4;
1220
0
            y[i].scales[j%4 + 8] |= (l << (2*(j/4)));
1221
0
        }
1222
0
        y[i].d = GGML_FP32_TO_FP16(d_block);
1223
1224
0
        int8_t sc;
1225
0
        for (int j = 0; j < QK_K/16; ++j) {
1226
0
            sc = j < 8 ? y[i].scales[j] & 0xF : y[i].scales[j-8] >> 4;
1227
0
            sc = (sc | (((y[i].scales[8 + j%4] >> (2*(j/4))) & 3) << 4)) - 32;
1228
0
            float d = GGML_FP16_TO_FP32(y[i].d) * sc;
1229
0
            if (!d) {
1230
0
                continue;
1231
0
            }
1232
0
            for (int ii = 0; ii < 16; ++ii) {
1233
0
                int l = nearest_int(x[16*j + ii]/d);
1234
0
                l = MAX(-4, MIN(3, l));
1235
0
                L[16*j + ii] = l + 4;
1236
0
            }
1237
0
        }
1238
1239
0
        memset(y[i].hmask, 0, QK_K/8);
1240
        // We put the high-bit for the 1st 8 quants into bit 0, the next 8 into bit 1, etc.
1241
0
        int m = 0;
1242
0
        uint8_t hm = 1;
1243
0
        for (int j = 0; j < QK_K; ++j) {
1244
0
            if (L[j] > 3) {
1245
0
                y[i].hmask[m] |= hm;
1246
0
                L[j] -= 4;
1247
0
            }
1248
0
            if (++m == QK_K/8) {
1249
0
                m = 0; hm <<= 1;
1250
0
            }
1251
0
        }
1252
0
        for (int j = 0; j < QK_K; j += 128) {
1253
0
            for (int l = 0; l < 32; ++l) {
1254
0
                y[i].qs[j/4 + l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
1255
0
            }
1256
0
        }
1257
1258
0
        x += QK_K;
1259
0
    }
1260
0
}
1261
1262
0
size_t quantize_q3_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1263
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q3_K, n_per_row);
1264
0
    if (!quant_weights) {
1265
0
        quantize_row_q3_K_ref(src, dst, (int64_t)nrow*n_per_row);
1266
0
    }
1267
0
    else {
1268
0
        char * qrow = (char *)dst;
1269
0
        for (int64_t row = 0; row < nrow; ++row) {
1270
0
            quantize_row_q3_K_impl(src, (block_q3_K*)qrow, n_per_row, quant_weights);
1271
0
            src += n_per_row;
1272
0
            qrow += row_size;
1273
0
        }
1274
0
    }
1275
0
    return nrow * row_size;
1276
0
}
1277
1278
// ====================== 4-bit (de)-quantization
1279
1280
0
void quantize_row_q4_K_ref(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t k) {
1281
0
    assert(k % QK_K == 0);
1282
0
    const int nb = k / QK_K;
1283
1284
0
    uint8_t L[QK_K];
1285
0
    uint8_t Laux[32];
1286
0
    float   weights[32];
1287
0
    float mins[QK_K/32];
1288
0
    float scales[QK_K/32];
1289
1290
0
    for (int i = 0; i < nb; i++) {
1291
0
        float max_scale = 0; // as we are deducting the min, scales are always positive
1292
0
        float max_min = 0;
1293
0
        for (int j = 0; j < QK_K/32; ++j) {
1294
            //scales[j] = make_qkx1_quants(32, 15, x + 32*j, L + 32*j, &mins[j], 9, 0.5f);
1295
0
            float sum_x2 = 0;
1296
0
            for (int l = 0; l < 32; ++l) sum_x2 += x[32*j + l] * x[32*j + l];
1297
0
            float av_x = sqrtf(sum_x2/32);
1298
0
            for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
1299
0
            scales[j] = make_qkx2_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -1.f, 0.1f, 20, false);
1300
0
            float scale = scales[j];
1301
0
            if (scale > max_scale) {
1302
0
                max_scale = scale;
1303
0
            }
1304
0
            float min = mins[j];
1305
0
            if (min > max_min) {
1306
0
                max_min = min;
1307
0
            }
1308
0
        }
1309
1310
0
        float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f;
1311
0
        float inv_min   = max_min   > 0 ? 63.f/max_min   : 0.f;
1312
0
        for (int j = 0; j < QK_K/32; ++j) {
1313
0
            uint8_t ls = nearest_int(inv_scale*scales[j]);
1314
0
            uint8_t lm = nearest_int(inv_min*mins[j]);
1315
0
            ls = MIN(63, ls);
1316
0
            lm = MIN(63, lm);
1317
0
            if (j < 4) {
1318
0
                y[i].scales[j] = ls;
1319
0
                y[i].scales[j+4] = lm;
1320
0
            } else {
1321
0
                y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
1322
0
                y[i].scales[j-4] |= ((ls >> 4) << 6);
1323
0
                y[i].scales[j-0] |= ((lm >> 4) << 6);
1324
0
            }
1325
0
        }
1326
0
        y[i].d = GGML_FP32_TO_FP16(max_scale/63.f);
1327
0
        y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f);
1328
1329
0
        uint8_t sc, m;
1330
0
        for (int j = 0; j < QK_K/32; ++j) {
1331
0
            get_scale_min_k4(j, y[i].scales, &sc, &m);
1332
0
            const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
1333
0
            if (!d) continue;
1334
0
            const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
1335
0
            for (int ii = 0; ii < 32; ++ii) {
1336
0
                int l = nearest_int((x[32*j + ii] + dm)/d);
1337
0
                l = MAX(0, MIN(15, l));
1338
0
                L[32*j + ii] = l;
1339
0
            }
1340
0
        }
1341
1342
0
        uint8_t * q = y[i].qs;
1343
0
        for (int j = 0; j < QK_K; j += 64) {
1344
0
            for (int l = 0; l < 32; ++l) q[l] = L[j + l] | (L[j + l + 32] << 4);
1345
0
            q += 32;
1346
0
        }
1347
1348
0
        x += QK_K;
1349
0
    }
1350
0
}
1351
1352
0
void dequantize_row_q4_K(const block_q4_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
1353
0
    assert(k % QK_K == 0);
1354
0
    const int nb = k / QK_K;
1355
1356
0
    for (int i = 0; i < nb; i++) {
1357
0
        const uint8_t * q = x[i].qs;
1358
1359
0
        const float d   = GGML_FP16_TO_FP32(x[i].d);
1360
0
        const float min = GGML_FP16_TO_FP32(x[i].dmin);
1361
1362
0
        int is = 0;
1363
0
        uint8_t sc, m;
1364
0
        for (int j = 0; j < QK_K; j += 64) {
1365
0
            get_scale_min_k4(is + 0, x[i].scales, &sc, &m);
1366
0
            const float d1 = d * sc; const float m1 = min * m;
1367
0
            get_scale_min_k4(is + 1, x[i].scales, &sc, &m);
1368
0
            const float d2 = d * sc; const float m2 = min * m;
1369
0
            for (int l = 0; l < 32; ++l) *y++ = d1 * (q[l] & 0xF) - m1;
1370
0
            for (int l = 0; l < 32; ++l) *y++ = d2 * (q[l]  >> 4) - m2;
1371
0
            q += 32; is += 2;
1372
0
        }
1373
0
    }
1374
0
}
1375
1376
0
static void quantize_row_q4_K_impl(const float * GGML_RESTRICT x, block_q4_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
1377
0
    assert(n_per_row % QK_K == 0);
1378
0
    const int64_t nb = n_per_row / QK_K;
1379
1380
0
    uint8_t L[QK_K];
1381
0
    uint8_t Laux[32];
1382
0
    uint8_t Ls[QK_K/32];
1383
0
    uint8_t Lm[QK_K/32];
1384
0
    float   weights[32];
1385
0
    float   sw[QK_K/32];
1386
0
    float   mins[QK_K/32];
1387
0
    float   scales[QK_K/32];
1388
1389
0
    for (int i = 0; i < nb; i++) {
1390
1391
0
        float sum_x2 = 0;
1392
0
        for (int l = 0; l < QK_K; ++l) sum_x2 += x[l] * x[l];
1393
0
        float sigma2 = 2*sum_x2/QK_K;
1394
0
        float av_x = sqrtf(sigma2);
1395
1396
0
        for (int j = 0; j < QK_K/32; ++j) {
1397
0
            if (quant_weights) {
1398
0
                const float * qw = quant_weights + QK_K*i + 32*j;
1399
0
                for (int l = 0; l < 32; ++l) weights[l] = qw[l] * sqrtf(sigma2 + x[32*j + l]*x[32*j + l]);
1400
0
            } else {
1401
0
                for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
1402
0
            }
1403
0
            float sumw = 0;
1404
0
            for (int l = 0; l < 32; ++l) sumw += weights[l];
1405
0
            sw[j] = sumw;
1406
0
            scales[j] = make_qkx3_quants(32, 15, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
1407
0
        }
1408
1409
0
        float d_block = make_qp_quants(QK_K/32, 63, scales, Ls, sw);
1410
0
        float m_block = make_qp_quants(QK_K/32, 63, mins,   Lm, sw);
1411
0
        for (int j = 0; j < QK_K/32; ++j) {
1412
0
            uint8_t ls = Ls[j];
1413
0
            uint8_t lm = Lm[j];
1414
0
            if (j < 4) {
1415
0
                y[i].scales[j] = ls;
1416
0
                y[i].scales[j+4] = lm;
1417
0
            } else {
1418
0
                y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
1419
0
                y[i].scales[j-4] |= ((ls >> 4) << 6);
1420
0
                y[i].scales[j-0] |= ((lm >> 4) << 6);
1421
0
            }
1422
0
        }
1423
0
        y[i].d = GGML_FP32_TO_FP16(d_block);
1424
0
        y[i].dmin = GGML_FP32_TO_FP16(m_block);
1425
1426
0
        uint8_t sc, m;
1427
0
        for (int j = 0; j < QK_K/32; ++j) {
1428
0
            get_scale_min_k4(j, y[i].scales, &sc, &m);
1429
0
            const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
1430
0
            if (!d) continue;
1431
0
            const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
1432
0
            for (int ii = 0; ii < 32; ++ii) {
1433
0
                int l = nearest_int((x[32*j + ii] + dm)/d);
1434
0
                l = MAX(0, MIN(15, l));
1435
0
                L[32*j + ii] = l;
1436
0
            }
1437
0
        }
1438
0
        uint8_t * q = y[i].qs;
1439
0
        for (int j = 0; j < QK_K; j += 64) {
1440
0
            for (int l = 0; l < 32; ++l) q[l] = L[j + l] | (L[j + l + 32] << 4);
1441
0
            q += 32;
1442
0
        }
1443
1444
0
        x += QK_K;
1445
1446
0
    }
1447
0
}
1448
1449
0
size_t quantize_q4_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1450
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q4_K, n_per_row);
1451
0
    if (!quant_weights) {
1452
0
        quantize_row_q4_K_ref(src, dst, (int64_t)nrow*n_per_row);
1453
0
    }
1454
0
    else {
1455
0
        char * qrow = (char *)dst;
1456
0
        for (int64_t row = 0; row < nrow; ++row) {
1457
0
            quantize_row_q4_K_impl(src, (block_q4_K*)qrow, n_per_row, quant_weights);
1458
0
            src += n_per_row;
1459
0
            qrow += row_size;
1460
0
        }
1461
0
    }
1462
0
    return nrow * row_size;
1463
0
}
1464
1465
// ====================== 5-bit (de)-quantization
1466
1467
0
void quantize_row_q5_K_ref(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t k) {
1468
0
    assert(k % QK_K == 0);
1469
0
    const int64_t nb = k / QK_K;
1470
1471
0
    uint8_t L[QK_K];
1472
0
    float mins[QK_K/32];
1473
0
    float scales[QK_K/32];
1474
0
    float weights[32];
1475
0
    uint8_t Laux[32];
1476
1477
0
    for (int i = 0; i < nb; i++) {
1478
0
        float max_scale = 0; // as we are deducting the min, scales are always positive
1479
0
        float max_min = 0;
1480
0
        for (int j = 0; j < QK_K/32; ++j) {
1481
            //scales[j] = make_qkx1_quants(32, 31, x + 32*j, L + 32*j, &mins[j], 9, 0.5f);
1482
0
            float sum_x2 = 0;
1483
0
            for (int l = 0; l < 32; ++l) sum_x2 += x[32*j + l] * x[32*j + l];
1484
0
            float av_x = sqrtf(sum_x2/32);
1485
0
            for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
1486
0
            scales[j] = make_qkx2_quants(32, 31, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.5f, 0.1f, 15, false);
1487
0
            float scale = scales[j];
1488
0
            if (scale > max_scale) {
1489
0
                max_scale = scale;
1490
0
            }
1491
0
            float min = mins[j];
1492
0
            if (min > max_min) {
1493
0
                max_min = min;
1494
0
            }
1495
0
        }
1496
1497
0
        float inv_scale = max_scale > 0 ? 63.f/max_scale : 0.f;
1498
0
        float inv_min   = max_min   > 0 ? 63.f/max_min   : 0.f;
1499
0
        for (int j = 0; j < QK_K/32; ++j) {
1500
0
            uint8_t ls = nearest_int(inv_scale*scales[j]);
1501
0
            uint8_t lm = nearest_int(inv_min*mins[j]);
1502
0
            ls = MIN(63, ls);
1503
0
            lm = MIN(63, lm);
1504
0
            if (j < 4) {
1505
0
                y[i].scales[j] = ls;
1506
0
                y[i].scales[j+4] = lm;
1507
0
            } else {
1508
0
                y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
1509
0
                y[i].scales[j-4] |= ((ls >> 4) << 6);
1510
0
                y[i].scales[j-0] |= ((lm >> 4) << 6);
1511
0
            }
1512
0
        }
1513
0
        y[i].d = GGML_FP32_TO_FP16(max_scale/63.f);
1514
0
        y[i].dmin = GGML_FP32_TO_FP16(max_min/63.f);
1515
1516
0
        uint8_t sc, m;
1517
0
        for (int j = 0; j < QK_K/32; ++j) {
1518
0
            get_scale_min_k4(j, y[i].scales, &sc, &m);
1519
0
            const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
1520
0
            if (!d) continue;
1521
0
            const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
1522
0
            for (int ii = 0; ii < 32; ++ii) {
1523
0
                int l = nearest_int((x[32*j + ii] + dm)/d);
1524
0
                l = MAX(0, MIN(31, l));
1525
0
                L[32*j + ii] = l;
1526
0
            }
1527
0
        }
1528
1529
0
        uint8_t * GGML_RESTRICT qh = y[i].qh;
1530
0
        uint8_t * GGML_RESTRICT ql = y[i].qs;
1531
0
        memset(qh, 0, QK_K/8);
1532
1533
0
        uint8_t m1 = 1, m2 = 2;
1534
0
        for (int n = 0; n < QK_K; n += 64) {
1535
0
            for (int j = 0; j < 32; ++j) {
1536
0
                int l1 = L[n + j];
1537
0
                if (l1 > 15) {
1538
0
                    l1 -= 16; qh[j] |= m1;
1539
0
                }
1540
0
                int l2 = L[n + j + 32];
1541
0
                if (l2 > 15) {
1542
0
                    l2 -= 16; qh[j] |= m2;
1543
0
                }
1544
0
                ql[j] = l1 | (l2 << 4);
1545
0
            }
1546
0
            m1 <<= 2; m2 <<= 2;
1547
0
            ql += 32;
1548
0
        }
1549
1550
0
        x += QK_K;
1551
0
    }
1552
0
}
1553
1554
0
void dequantize_row_q5_K(const block_q5_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
1555
0
    assert(k % QK_K == 0);
1556
0
    const int64_t nb = k / QK_K;
1557
1558
0
    for (int i = 0; i < nb; i++) {
1559
0
        const uint8_t * ql = x[i].qs;
1560
0
        const uint8_t * qh = x[i].qh;
1561
1562
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
1563
0
        const float min = GGML_FP16_TO_FP32(x[i].dmin);
1564
1565
0
        int is = 0;
1566
0
        uint8_t sc, m;
1567
0
        uint8_t u1 = 1, u2 = 2;
1568
0
        for (int j = 0; j < QK_K; j += 64) {
1569
0
            get_scale_min_k4(is + 0, x[i].scales, &sc, &m);
1570
0
            const float d1 = d * sc; const float m1 = min * m;
1571
0
            get_scale_min_k4(is + 1, x[i].scales, &sc, &m);
1572
0
            const float d2 = d * sc; const float m2 = min * m;
1573
0
            for (int l = 0; l < 32; ++l) *y++ = d1 * ((ql[l] & 0xF) + (qh[l] & u1 ? 16 : 0)) - m1;
1574
0
            for (int l = 0; l < 32; ++l) *y++ = d2 * ((ql[l]  >> 4) + (qh[l] & u2 ? 16 : 0)) - m2;
1575
0
            ql += 32; is += 2;
1576
0
            u1 <<= 2; u2 <<= 2;
1577
0
        }
1578
0
    }
1579
0
}
1580
1581
0
static void quantize_row_q5_K_impl(const float * GGML_RESTRICT x, block_q5_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
1582
0
    assert(n_per_row % QK_K == 0);
1583
0
    const int64_t nb = n_per_row / QK_K;
1584
1585
0
    uint8_t L[QK_K];
1586
0
    uint8_t Laux[32];
1587
0
    uint8_t Ls[QK_K/32];
1588
0
    uint8_t Lm[QK_K/32];
1589
0
    float   mins[QK_K/32];
1590
0
    float   scales[QK_K/32];
1591
0
    float   sw[QK_K/32];
1592
0
    float   weights[32];
1593
1594
0
    for (int i = 0; i < nb; i++) {
1595
1596
0
        float sum_x2 = 0;
1597
0
        for (int l = 0; l < QK_K; ++l) sum_x2 += x[l] * x[l];
1598
0
        float sigma2 = 2*sum_x2/QK_K;
1599
0
        float av_x = sqrtf(sigma2);
1600
1601
0
        for (int j = 0; j < QK_K/32; ++j) {
1602
0
            if (quant_weights) {
1603
0
                const float * qw = quant_weights + QK_K*i + 32*j;
1604
0
                for (int l = 0; l < 32; ++l) weights[l] = qw[l] * sqrtf(sigma2 + x[32*j + l]*x[32*j + l]);
1605
0
            } else {
1606
0
                for (int l = 0; l < 32; ++l) weights[l] = av_x + fabsf(x[32*j + l]);
1607
0
            }
1608
0
            float sumw = 0;
1609
0
            for (int l = 0; l < 32; ++l) sumw += weights[l];
1610
0
            sw[j] = sumw;
1611
1612
0
            scales[j] = make_qkx3_quants(32, 31, x + 32*j, weights, L + 32*j, &mins[j], Laux, -0.9f, 0.05f, 36, false);
1613
0
        }
1614
1615
0
        float d_block = make_qp_quants(QK_K/32, 63, scales, Ls, sw);
1616
0
        float m_block = make_qp_quants(QK_K/32, 63, mins,   Lm, sw);
1617
1618
0
        for (int j = 0; j < QK_K/32; ++j) {
1619
0
            uint8_t ls = Ls[j];
1620
0
            uint8_t lm = Lm[j];
1621
0
            ls = MIN(63, ls);
1622
0
            lm = MIN(63, lm);
1623
0
            if (j < 4) {
1624
0
                y[i].scales[j] = ls;
1625
0
                y[i].scales[j+4] = lm;
1626
0
            } else {
1627
0
                y[i].scales[j+4] = (ls & 0xF) | ((lm & 0xF) << 4);
1628
0
                y[i].scales[j-4] |= ((ls >> 4) << 6);
1629
0
                y[i].scales[j-0] |= ((lm >> 4) << 6);
1630
0
            }
1631
0
        }
1632
0
        y[i].d = GGML_FP32_TO_FP16(d_block);
1633
0
        y[i].dmin = GGML_FP32_TO_FP16(m_block);
1634
1635
0
        uint8_t sc, m;
1636
0
        for (int j = 0; j < QK_K/32; ++j) {
1637
0
            get_scale_min_k4(j, y[i].scales, &sc, &m);
1638
0
            const float d = GGML_FP16_TO_FP32(y[i].d) * sc;
1639
0
            if (!d) continue;
1640
0
            const float dm = GGML_FP16_TO_FP32(y[i].dmin) * m;
1641
0
            for (int ii = 0; ii < 32; ++ii) {
1642
0
                int l = nearest_int((x[32*j + ii] + dm)/d);
1643
0
                l = MAX(0, MIN(31, l));
1644
0
                L[32*j + ii] = l;
1645
0
            }
1646
0
        }
1647
1648
0
        uint8_t * GGML_RESTRICT qh = y[i].qh;
1649
0
        uint8_t * GGML_RESTRICT ql = y[i].qs;
1650
0
        memset(qh, 0, QK_K/8);
1651
1652
0
        uint8_t m1 = 1, m2 = 2;
1653
0
        for (int n = 0; n < QK_K; n += 64) {
1654
0
            for (int j = 0; j < 32; ++j) {
1655
0
                int l1 = L[n + j];
1656
0
                if (l1 > 15) {
1657
0
                    l1 -= 16; qh[j] |= m1;
1658
0
                }
1659
0
                int l2 = L[n + j + 32];
1660
0
                if (l2 > 15) {
1661
0
                    l2 -= 16; qh[j] |= m2;
1662
0
                }
1663
0
                ql[j] = l1 | (l2 << 4);
1664
0
            }
1665
0
            m1 <<= 2; m2 <<= 2;
1666
0
            ql += 32;
1667
0
        }
1668
1669
0
        x += QK_K;
1670
1671
0
    }
1672
0
}
1673
1674
0
size_t quantize_q5_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1675
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q5_K, n_per_row);
1676
0
    if (!quant_weights) {
1677
0
        quantize_row_q5_K_ref(src, dst, (int64_t)nrow*n_per_row);
1678
0
    }
1679
0
    else {
1680
0
        char * qrow = (char *)dst;
1681
0
        for (int64_t row = 0; row < nrow; ++row) {
1682
0
            quantize_row_q5_K_impl(src, (block_q5_K*)qrow, n_per_row, quant_weights);
1683
0
            src += n_per_row;
1684
0
            qrow += row_size;
1685
0
        }
1686
0
    }
1687
0
    return nrow * row_size;
1688
0
}
1689
1690
// ====================== 6-bit (de)-quantization
1691
1692
0
void quantize_row_q6_K_ref(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t k) {
1693
0
    assert(k % QK_K == 0);
1694
0
    const int64_t nb = k / QK_K;
1695
1696
0
    int8_t L[QK_K];
1697
0
    float   scales[QK_K/16];
1698
1699
0
    for (int i = 0; i < nb; i++) {
1700
1701
0
        float max_scale = 0;
1702
0
        float max_abs_scale = 0;
1703
1704
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
1705
1706
0
            const float scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, NULL);
1707
0
            scales[ib] = scale;
1708
1709
0
            const float abs_scale = fabsf(scale);
1710
0
            if (abs_scale > max_abs_scale) {
1711
0
                max_abs_scale = abs_scale;
1712
0
                max_scale = scale;
1713
0
            }
1714
1715
0
        }
1716
1717
0
        if (max_abs_scale < GROUP_MAX_EPS) {
1718
0
            memset(&y[i], 0, sizeof(block_q6_K));
1719
0
            y[i].d = GGML_FP32_TO_FP16(0.f);
1720
0
            x += QK_K;
1721
0
            continue;
1722
0
        }
1723
1724
0
        float iscale = -128.f/max_scale;
1725
0
        y[i].d = GGML_FP32_TO_FP16(1/iscale);
1726
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
1727
0
            y[i].scales[ib] = MIN(127, nearest_int(iscale*scales[ib]));
1728
0
        }
1729
1730
0
        for (int j = 0; j < QK_K/16; ++j) {
1731
0
            float d = GGML_FP16_TO_FP32(y[i].d) * y[i].scales[j];
1732
0
            if (!d) {
1733
0
                continue;
1734
0
            }
1735
0
            for (int ii = 0; ii < 16; ++ii) {
1736
0
                int l = nearest_int(x[16*j + ii]/d);
1737
0
                l = MAX(-32, MIN(31, l));
1738
0
                L[16*j + ii] = l + 32;
1739
0
            }
1740
0
        }
1741
1742
0
        uint8_t * GGML_RESTRICT ql = y[i].ql;
1743
0
        uint8_t * GGML_RESTRICT qh = y[i].qh;
1744
0
        for (int j = 0; j < QK_K; j += 128) {
1745
0
            for (int l = 0; l < 32; ++l) {
1746
0
                const uint8_t q1 = L[j + l +  0] & 0xF;
1747
0
                const uint8_t q2 = L[j + l + 32] & 0xF;
1748
0
                const uint8_t q3 = L[j + l + 64] & 0xF;
1749
0
                const uint8_t q4 = L[j + l + 96] & 0xF;
1750
0
                ql[l+ 0] = q1 | (q3 << 4);
1751
0
                ql[l+32] = q2 | (q4 << 4);
1752
0
                qh[l] = (L[j + l] >> 4) | ((L[j + l + 32] >> 4) << 2) | ((L[j + l + 64] >> 4) << 4) | ((L[j + l + 96] >> 4) << 6);
1753
0
            }
1754
0
            ql += 64;
1755
0
            qh += 32;
1756
0
        }
1757
1758
0
        x += QK_K;
1759
0
    }
1760
0
}
1761
1762
0
void dequantize_row_q6_K(const block_q6_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
1763
0
    assert(k % QK_K == 0);
1764
0
    const int64_t nb = k / QK_K;
1765
1766
0
    for (int i = 0; i < nb; i++) {
1767
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
1768
1769
0
        const uint8_t * GGML_RESTRICT ql = x[i].ql;
1770
0
        const uint8_t * GGML_RESTRICT qh = x[i].qh;
1771
0
        const int8_t  * GGML_RESTRICT sc = x[i].scales;
1772
1773
0
        for (int n = 0; n < QK_K; n += 128) {
1774
0
            for (int l = 0; l < 32; ++l) {
1775
0
                int is = l/16;
1776
0
                const int8_t q1 = (int8_t)((ql[l +  0] & 0xF) | (((qh[l] >> 0) & 3) << 4)) - 32;
1777
0
                const int8_t q2 = (int8_t)((ql[l + 32] & 0xF) | (((qh[l] >> 2) & 3) << 4)) - 32;
1778
0
                const int8_t q3 = (int8_t)((ql[l +  0]  >> 4) | (((qh[l] >> 4) & 3) << 4)) - 32;
1779
0
                const int8_t q4 = (int8_t)((ql[l + 32]  >> 4) | (((qh[l] >> 6) & 3) << 4)) - 32;
1780
0
                y[l +  0] = d * sc[is + 0] * q1;
1781
0
                y[l + 32] = d * sc[is + 2] * q2;
1782
0
                y[l + 64] = d * sc[is + 4] * q3;
1783
0
                y[l + 96] = d * sc[is + 6] * q4;
1784
0
            }
1785
0
            y  += 128;
1786
0
            ql += 64;
1787
0
            qh += 32;
1788
0
            sc += 8;
1789
0
        }
1790
0
    }
1791
0
}
1792
1793
0
static void quantize_row_q6_K_impl(const float * GGML_RESTRICT x, block_q6_K * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
1794
0
    assert(n_per_row % QK_K == 0);
1795
0
    const int64_t nb = n_per_row / QK_K;
1796
1797
0
    int8_t L[QK_K];
1798
0
    float   scales[QK_K/16];
1799
    //float   weights[16];
1800
1801
0
    for (int i = 0; i < nb; i++) {
1802
1803
        //float sum_x2 = 0;
1804
        //for (int j = 0; j < QK_K; ++j) sum_x2 += x[j]*x[j];
1805
        //float sigma2 = sum_x2/QK_K;
1806
1807
0
        float max_scale = 0;
1808
0
        float max_abs_scale = 0;
1809
1810
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
1811
1812
0
            float scale;
1813
0
            if (quant_weights) {
1814
0
                const float * qw = quant_weights + QK_K*i + 16*ib;
1815
                //for (int j = 0; j < 16; ++j) weights[j] = qw[j] * sqrtf(sigma2 + x[16*ib + j]*x[16*ib + j]);
1816
                //scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, weights);
1817
0
                scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, qw);
1818
0
            } else {
1819
0
                scale = make_qx_quants(16, 32, x + 16*ib, L + 16*ib, 1, NULL);
1820
0
            }
1821
0
            scales[ib] = scale;
1822
1823
0
            const float abs_scale = fabsf(scale);
1824
0
            if (abs_scale > max_abs_scale) {
1825
0
                max_abs_scale = abs_scale;
1826
0
                max_scale = scale;
1827
0
            }
1828
1829
0
        }
1830
1831
0
        if (max_abs_scale < GROUP_MAX_EPS) {
1832
0
            memset(&y[i], 0, sizeof(block_q6_K));
1833
0
            y[i].d = GGML_FP32_TO_FP16(0.f);
1834
0
            x += QK_K;
1835
0
            continue;
1836
0
        }
1837
1838
0
        float iscale = -128.f/max_scale;
1839
0
        y[i].d = GGML_FP32_TO_FP16(1/iscale);
1840
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
1841
0
            y[i].scales[ib] = MIN(127, nearest_int(iscale*scales[ib]));
1842
0
        }
1843
1844
0
        for (int j = 0; j < QK_K/16; ++j) {
1845
0
            float d = GGML_FP16_TO_FP32(y[i].d) * y[i].scales[j];
1846
0
            if (!d) {
1847
0
                continue;
1848
0
            }
1849
0
            for (int ii = 0; ii < 16; ++ii) {
1850
0
                int l = nearest_int(x[16*j + ii]/d);
1851
0
                l = MAX(-32, MIN(31, l));
1852
0
                L[16*j + ii] = l + 32;
1853
0
            }
1854
0
        }
1855
1856
0
        uint8_t * GGML_RESTRICT ql = y[i].ql;
1857
0
        uint8_t * GGML_RESTRICT qh = y[i].qh;
1858
0
        for (int j = 0; j < QK_K; j += 128) {
1859
0
            for (int l = 0; l < 32; ++l) {
1860
0
                const uint8_t q1 = L[j + l +  0] & 0xF;
1861
0
                const uint8_t q2 = L[j + l + 32] & 0xF;
1862
0
                const uint8_t q3 = L[j + l + 64] & 0xF;
1863
0
                const uint8_t q4 = L[j + l + 96] & 0xF;
1864
0
                ql[l+ 0] = q1 | (q3 << 4);
1865
0
                ql[l+32] = q2 | (q4 << 4);
1866
0
                qh[l] = (L[j + l] >> 4) | ((L[j + l + 32] >> 4) << 2) | ((L[j + l + 64] >> 4) << 4) | ((L[j + l + 96] >> 4) << 6);
1867
0
            }
1868
0
            ql += 64;
1869
0
            qh += 32;
1870
0
        }
1871
1872
0
        x += QK_K;
1873
1874
0
    }
1875
0
}
1876
1877
0
size_t quantize_q6_K(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1878
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q6_K, n_per_row);
1879
0
    if (!quant_weights) {
1880
0
        quantize_row_q6_K_ref(src, dst, (int64_t)nrow*n_per_row);
1881
0
    }
1882
0
    else {
1883
0
        char * qrow = (char *)dst;
1884
0
        for (int64_t row = 0; row < nrow; ++row) {
1885
0
            quantize_row_q6_K_impl(src, (block_q6_K*)qrow, n_per_row, quant_weights);
1886
0
            src += n_per_row;
1887
0
            qrow += row_size;
1888
0
        }
1889
0
    }
1890
0
    return nrow * row_size;
1891
0
}
1892
1893
0
static void quantize_row_q4_0_impl(const float * GGML_RESTRICT x, block_q4_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
1894
0
    static_assert(QK4_0 == 32, "QK4_0 must be 32");
1895
1896
0
    if (!quant_weights) {
1897
0
        quantize_row_q4_0_ref(x, y, n_per_row);
1898
0
        return;
1899
0
    }
1900
1901
0
    float weight[QK4_0];
1902
0
    int8_t L[QK4_0];
1903
1904
0
    float sum_x2 = 0;
1905
0
    for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
1906
0
    float sigma2 = sum_x2/n_per_row;
1907
1908
0
    const int64_t nb = n_per_row/QK4_0;
1909
0
    for (int ib = 0; ib < nb; ++ib) {
1910
0
        const float * xb = x + QK4_0 * ib;
1911
0
        const float * qw = quant_weights + QK4_0 * ib;
1912
0
        for (int j = 0; j < QK4_0; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
1913
0
        float d = make_qx_quants(QK4_0, 8, xb, L, 1, weight);
1914
0
        y[ib].d = GGML_FP32_TO_FP16(d);
1915
0
        for (int j = 0; j < 16; ++j) {
1916
0
            y[ib].qs[j] = L[j] | (L[j+16] << 4);
1917
0
        }
1918
0
    }
1919
0
}
1920
1921
0
size_t quantize_q4_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1922
0
    if (!quant_weights) {
1923
0
        quantize_row_q4_0_ref(src, dst, (int64_t)nrow*n_per_row);
1924
0
        return nrow * ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
1925
0
    }
1926
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q4_0, n_per_row);
1927
0
    char * qrow = (char *)dst;
1928
0
    for (int64_t row = 0; row < nrow; ++row) {
1929
0
        quantize_row_q4_0_impl(src, (block_q4_0*)qrow, n_per_row, quant_weights);
1930
0
        src += n_per_row;
1931
0
        qrow += row_size;
1932
0
    }
1933
0
    return nrow * row_size;
1934
0
}
1935
1936
0
static void quantize_row_q4_1_impl(const float * GGML_RESTRICT x, block_q4_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
1937
0
    static_assert(QK4_1 == 32, "QK4_1 must be 32");
1938
1939
0
    if (!quant_weights) {
1940
0
        quantize_row_q4_1_ref(x, y, n_per_row);
1941
0
        return;
1942
0
    }
1943
1944
0
    float weight[QK4_1];
1945
0
    uint8_t L[QK4_1], Laux[QK4_1];
1946
1947
0
    float sum_x2 = 0;
1948
0
    for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
1949
0
    float sigma2 = sum_x2/n_per_row;
1950
1951
0
    const int64_t nb = n_per_row/QK4_1;
1952
0
    for (int ib = 0; ib < nb; ++ib) {
1953
0
        const float * xb = x + QK4_1 * ib;
1954
0
        const float * qw = quant_weights + QK4_1 * ib;
1955
0
        for (int j = 0; j < QK4_1; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
1956
0
        float min;
1957
0
        float d = make_qkx3_quants(QK4_1, 15, xb, weight, L, &min, Laux, -0.9f, 0.05f, 36, false);
1958
0
        y[ib].d = GGML_FP32_TO_FP16(d);
1959
0
        y[ib].m = GGML_FP32_TO_FP16(-min);
1960
0
        for (int j = 0; j < 16; ++j) {
1961
0
            y[ib].qs[j] = L[j] | (L[j+16] << 4);
1962
0
        }
1963
0
    }
1964
0
}
1965
1966
0
size_t quantize_q4_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
1967
0
    if (!quant_weights) {
1968
0
        quantize_row_q4_1_ref(src, dst, (int64_t)nrow*n_per_row);
1969
0
        return nrow * ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
1970
0
    }
1971
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q4_1, n_per_row);
1972
0
    char * qrow = (char *)dst;
1973
0
    for (int64_t row = 0; row < nrow; ++row) {
1974
0
        quantize_row_q4_1_impl(src, (block_q4_1*)qrow, n_per_row, quant_weights);
1975
0
        src += n_per_row;
1976
0
        qrow += row_size;
1977
0
    }
1978
0
    return nrow * row_size;
1979
0
}
1980
1981
0
static void quantize_row_q5_0_impl(const float * GGML_RESTRICT x, block_q5_0 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
1982
0
    static_assert(QK5_0 == 32, "QK5_0 must be 32");
1983
1984
0
    if (!quant_weights) {
1985
0
        quantize_row_q5_0_ref(x, y, n_per_row);
1986
0
        return;
1987
0
    }
1988
1989
0
    float weight[QK5_0];
1990
0
    int8_t L[QK5_0];
1991
1992
0
    float sum_x2 = 0;
1993
0
    for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
1994
0
    float sigma2 = sum_x2/n_per_row;
1995
1996
0
    const int64_t nb = n_per_row/QK5_0;
1997
0
    for (int ib = 0; ib < nb; ++ib) {
1998
0
        const float * xb = x + QK5_0 * ib;
1999
0
        const float * qw = quant_weights + QK5_0 * ib;
2000
0
        for (int j = 0; j < QK5_0; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
2001
0
        float d = make_qx_quants(QK5_0, 16, xb, L, 1, weight);
2002
0
        y[ib].d = GGML_FP32_TO_FP16(d);
2003
2004
0
        uint32_t qh = 0;
2005
2006
0
        for (int j = 0; j < 16; ++j) {
2007
0
            const uint8_t xi0 = L[j];
2008
0
            const uint8_t xi1 = L[j+16];
2009
0
            y[ib].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
2010
2011
            // get the 5-th bit and store it in qh at the right position
2012
0
            qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
2013
0
            qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2);
2014
0
        }
2015
2016
0
        memcpy(&y[ib].qh, &qh, sizeof(qh));
2017
0
    }
2018
0
}
2019
2020
0
size_t quantize_q5_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2021
0
    if (!quant_weights) {
2022
0
        quantize_row_q5_0_ref(src, dst, (int64_t)nrow*n_per_row);
2023
0
        return nrow * ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
2024
0
    }
2025
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q5_0, n_per_row);
2026
0
    char * qrow = (char *)dst;
2027
0
    for (int64_t row = 0; row < nrow; ++row) {
2028
0
        quantize_row_q5_0_impl(src, (block_q5_0*)qrow, n_per_row, quant_weights);
2029
0
        src += n_per_row;
2030
0
        qrow += row_size;
2031
0
    }
2032
0
    return nrow * row_size;
2033
0
}
2034
2035
0
static void quantize_row_q5_1_impl(const float * GGML_RESTRICT x, block_q5_1 * GGML_RESTRICT y, int64_t n_per_row, const float * quant_weights) {
2036
0
    static_assert(QK5_1 == 32, "QK5_1 must be 32");
2037
2038
0
    if (!quant_weights) {
2039
0
        quantize_row_q5_1_ref(x, y, n_per_row);
2040
0
        return;
2041
0
    }
2042
2043
0
    float weight[QK5_1];
2044
0
    uint8_t L[QK5_1], Laux[QK5_1];
2045
2046
0
    float sum_x2 = 0;
2047
0
    for (int j = 0; j < n_per_row; ++j) sum_x2 += x[j]*x[j];
2048
0
    float sigma2 = sum_x2/n_per_row;
2049
2050
0
    const int64_t nb = n_per_row/QK5_1;
2051
0
    for (int ib = 0; ib < nb; ++ib) {
2052
0
        const float * xb = x + QK5_1 * ib;
2053
0
        const float * qw = quant_weights + QK5_1 * ib;
2054
0
        for (int j = 0; j < QK5_1; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
2055
0
        float min;
2056
0
        float d = make_qkx3_quants(QK5_1, 31, xb, weight, L, &min, Laux, -0.9f, 0.05f, 36, false);
2057
0
        y[ib].d = GGML_FP32_TO_FP16(d);
2058
0
        y[ib].m = GGML_FP32_TO_FP16(-min);
2059
2060
0
        uint32_t qh = 0;
2061
0
        for (int j = 0; j < 16; ++j) {
2062
0
            const uint8_t xi0 = L[j];
2063
0
            const uint8_t xi1 = L[j+16];
2064
0
            y[ib].qs[j] = (xi0 & 0x0F) | ((xi1 & 0x0F) << 4);
2065
            // get the 5-th bit and store it in qh at the right position
2066
0
            qh |= ((xi0 & 0x10u) >> 4) << (j + 0);
2067
0
            qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2);
2068
0
        }
2069
0
        memcpy(&y[ib].qh, &qh, sizeof(qh));
2070
0
    }
2071
0
}
2072
2073
0
size_t quantize_q5_1(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2074
0
    if (!quant_weights) {
2075
0
        quantize_row_q5_1_ref(src, dst, (int64_t)nrow*n_per_row);
2076
0
        return nrow * ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
2077
0
    }
2078
0
    size_t row_size = ggml_row_size(GGML_TYPE_Q5_1, n_per_row);
2079
0
    char * qrow = (char *)dst;
2080
0
    for (int64_t row = 0; row < nrow; ++row) {
2081
0
        quantize_row_q5_1_impl(src, (block_q5_1*)qrow, n_per_row, quant_weights);
2082
0
        src += n_per_row;
2083
0
        qrow += row_size;
2084
0
    }
2085
0
    return nrow * row_size;
2086
0
}
2087
2088
0
size_t quantize_q8_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2089
0
    (void)quant_weights; // not used
2090
0
    const size_t row_size = ggml_row_size(GGML_TYPE_Q8_0, n_per_row);
2091
0
    quantize_row_q8_0_ref(src, dst, (int64_t)nrow*n_per_row);
2092
0
    return nrow * row_size;
2093
0
}
2094
2095
0
size_t quantize_mxfp4(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2096
0
    GGML_UNUSED(quant_weights);
2097
0
    quantize_row_mxfp4_ref(src, dst, (int64_t)nrow*n_per_row);
2098
0
    return nrow * ggml_row_size(GGML_TYPE_MXFP4, n_per_row);
2099
0
}
2100
2101
// ====================== Ternary (de)-quantization (BitNet b1.58 and TriLMs)
2102
2103
0
void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k) {
2104
0
    assert(k % QK_K == 0);
2105
0
    const int64_t nb = k / QK_K;
2106
2107
0
    for (int64_t i = 0; i < nb; i++) {
2108
0
        float amax = 0.0f; // absolute max
2109
2110
0
        for (int j = 0; j < QK_K; j++) {
2111
0
            const float v = x[j];
2112
0
            amax = MAX(amax, fabsf(v));
2113
0
        }
2114
2115
0
        const float d = amax;
2116
0
        const float id = d ? 1.0f/d : 0.0f;
2117
2118
0
        y[i].d = GGML_FP32_TO_FP16(d);
2119
2120
        // 5 elements per byte, along 32 bytes
2121
0
        for (size_t j = 0; j < sizeof(y->qs) - sizeof(y->qs) % 32; j += 32) {
2122
0
            for (size_t m = 0; m < 32; ++m) {
2123
0
                uint8_t q = 0;
2124
0
                for (size_t n = 0; n < 5; ++n) {
2125
0
                    int xi = lroundf(x[m + n*32] * id) + 1; // -1, 0, 1 -> 0, 1, 2
2126
0
                    q *= 3;
2127
0
                    q += xi;
2128
0
                }
2129
                // ceiling division (243 == pow(3, 5))
2130
0
                q = ((uint16_t)q * 256 + (243 - 1)) / 243;
2131
0
                y[i].qs[j + m] = q;
2132
0
            }
2133
0
            x += 5*32;
2134
0
        }
2135
        // along 16 bytes
2136
0
        for (size_t j = sizeof(y->qs) - sizeof(y->qs) % 32; j < sizeof(y->qs); j += 16) {
2137
0
            for (size_t m = 0; m < 16; ++m) {
2138
0
                uint8_t q = 0;
2139
0
                for (size_t n = 0; n < 5; ++n) {
2140
0
                    int xi = lroundf(x[m + n*16] * id) + 1; // -1, 0, 1 -> 0, 1, 2
2141
0
                    q *= 3;
2142
0
                    q += xi;
2143
0
                }
2144
                // ceiling division (243 == pow(3, 5))
2145
0
                q = ((uint16_t)q * 256 + (243 - 1)) / 243;
2146
0
                y[i].qs[j + m] = q;
2147
0
            }
2148
0
            x += 5*16;
2149
0
        }
2150
        // 4 elements per byte
2151
0
        for (size_t j = 0; j < sizeof(y->qh); ++j) {
2152
0
            uint8_t q = 0;
2153
0
            for (size_t m = 0; m < 4; ++m) {
2154
                // -1, 0, 1 -> 0, 1, 2
2155
0
                int xi = lroundf(x[j + m*sizeof(y->qh)] * id) + 1;
2156
0
                q *= 3;
2157
0
                q += xi;
2158
0
            }
2159
            // shift the first value to the most significant trit
2160
0
            q *= 3;
2161
            // ceiling division (243 == pow(3, 5))
2162
0
            q = ((uint16_t)q * 256 + (243 - 1)) / 243;
2163
0
            y[i].qh[j] = q;
2164
0
        }
2165
0
        x += 4*sizeof(y->qh);
2166
0
    }
2167
0
}
2168
2169
0
void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k) {
2170
0
    assert(k % QK_K == 0);
2171
0
    const int64_t nb = k / QK_K;
2172
2173
0
    for (int64_t i = 0; i < nb; i++) {
2174
0
        float amax = 0.0f; // absolute max
2175
2176
0
        for (int j = 0; j < QK_K; j++) {
2177
0
            const float v = x[j];
2178
0
            amax = MAX(amax, fabsf(v));
2179
0
        }
2180
2181
0
        const float d = amax;
2182
0
        const float id = d ? 1.0f/d : 0.0f;
2183
2184
0
        y[i].d = GGML_FP32_TO_FP16(d);
2185
2186
0
        for (size_t j = 0; j < sizeof(y->qs); j += 32) {
2187
0
            for (size_t m = 0; m < 32; ++m) {
2188
0
                uint8_t q = 0;
2189
0
                for (size_t n = 0; n < 4; ++n) {
2190
                    // -1, 0, 1 -> 0, 1, 2
2191
0
                    int xi = lroundf(x[m + n*32] * id) + 1;
2192
0
                    q += (xi & 3) << (2*n);
2193
0
                }
2194
0
                y[i].qs[j + m] = q;
2195
0
            }
2196
0
            x += 4*32;
2197
0
        }
2198
0
    }
2199
0
}
2200
2201
0
size_t quantize_tq1_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2202
0
    (void)quant_weights; // not used
2203
0
    const size_t row_size = ggml_row_size(GGML_TYPE_TQ1_0, n_per_row);
2204
0
    quantize_row_tq1_0_ref(src, dst, (int64_t)nrow*n_per_row);
2205
0
    return nrow * row_size;
2206
0
}
2207
2208
0
size_t quantize_tq2_0(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
2209
0
    (void)quant_weights; // not used
2210
0
    const size_t row_size = ggml_row_size(GGML_TYPE_TQ2_0, n_per_row);
2211
0
    quantize_row_tq2_0_ref(src, dst, (int64_t)nrow*n_per_row);
2212
0
    return nrow * row_size;
2213
0
}
2214
2215
0
void dequantize_row_tq1_0(const block_tq1_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2216
0
    assert(k % QK_K == 0);
2217
0
    const int64_t nb = k / QK_K;
2218
2219
0
    const uint8_t pow3[6] = {1, 3, 9, 27, 81, 243};
2220
2221
0
    for (int64_t i = 0; i < nb; ++i) {
2222
2223
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2224
2225
0
        for (size_t j = 0; j < sizeof(x->qs) - sizeof(x->qs) % 32; j += 32) {
2226
0
            for (size_t n = 0; n < 5; ++n) {
2227
0
                for (size_t m = 0; m < 32; ++m) {
2228
0
                    uint8_t q = x[i].qs[j + m] * pow3[n];
2229
0
                    int16_t xi = ((uint16_t) q * 3) >> 8;
2230
0
                    *y++ = (float) (xi - 1) * d;
2231
0
                }
2232
0
            }
2233
0
        }
2234
0
        for (size_t j = sizeof(x->qs) - sizeof(x->qs) % 32; j < sizeof(x->qs); j += 16) {
2235
0
            for (size_t n = 0; n < 5; ++n) {
2236
0
                for (size_t m = 0; m < 16; ++m) {
2237
0
                    uint8_t q = x[i].qs[j + m] * pow3[n];
2238
0
                    int16_t xi = ((uint16_t) q * 3) >> 8;
2239
0
                    *y++ = (float) (xi - 1) * d;
2240
0
                }
2241
0
            }
2242
0
        }
2243
2244
0
        for (size_t n = 0; n < 4; ++n) {
2245
0
            for (size_t j = 0; j < sizeof(x->qh); ++j) {
2246
0
                uint8_t q = x[i].qh[j] * pow3[n];
2247
0
                int16_t xi = ((uint16_t) q * 3) >> 8;
2248
0
                *y++ = (float) (xi - 1) * d;
2249
0
            }
2250
0
        }
2251
0
    }
2252
0
}
2253
2254
0
void dequantize_row_tq2_0(const block_tq2_0 * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2255
0
    assert(k % QK_K == 0);
2256
0
    const int64_t nb = k / QK_K;
2257
2258
0
    for (int64_t i = 0; i < nb; ++i) {
2259
2260
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2261
2262
0
        for (size_t j = 0; j < sizeof(x->qs); j += 32) {
2263
0
            for (size_t l = 0; l < 4; ++l) {
2264
0
                for (size_t m = 0; m < 32; ++m) {
2265
0
                    int8_t q = (x[i].qs[j + m] >> (l*2)) & 3;
2266
0
                    *y++ = (float) (q - 1) * d;
2267
0
                }
2268
0
            }
2269
0
        }
2270
0
    }
2271
0
}
2272
2273
// ====================== "True" 2-bit (de)-quantization
2274
2275
0
void dequantize_row_iq2_xxs(const block_iq2_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2276
0
    assert(k % QK_K == 0);
2277
0
    const int64_t nb = k / QK_K;
2278
2279
0
    uint32_t aux32[2];
2280
0
    const uint8_t * aux8 = (const uint8_t *)aux32;
2281
2282
0
    for (int i = 0; i < nb; i++) {
2283
2284
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2285
2286
0
        for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
2287
0
            memcpy(aux32, x[i].qs + 4*ib32, 2*sizeof(uint32_t));
2288
0
            const float db = d * (0.5f + (aux32[1] >> 28)) * 0.25f;
2289
0
            for (int l = 0; l < 4; ++l) {
2290
0
                const uint8_t * grid = (const uint8_t *)(iq2xxs_grid + aux8[l]);
2291
0
                const uint8_t  signs = ksigns_iq2xs[(aux32[1] >> 7*l) & 127];
2292
0
                for (int j = 0; j < 8; ++j) {
2293
0
                    y[j] = db * grid[j] * (signs & kmask_iq2xs[j] ? -1.f : 1.f);
2294
0
                }
2295
0
                y += 8;
2296
0
            }
2297
0
        }
2298
0
    }
2299
0
}
2300
2301
// ====================== 2.3125 bpw (de)-quantization
2302
2303
0
void dequantize_row_iq2_xs(const block_iq2_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2304
0
    assert(k % QK_K == 0);
2305
0
    const int64_t nb = k / QK_K;
2306
2307
0
    float db[2];
2308
2309
0
    for (int i = 0; i < nb; i++) {
2310
2311
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2312
2313
0
        for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
2314
0
            db[0] = d * (0.5f + (x[i].scales[ib32] & 0xf)) * 0.25f;
2315
0
            db[1] = d * (0.5f + (x[i].scales[ib32] >>  4)) * 0.25f;
2316
0
            for (int l = 0; l < 4; ++l) {
2317
0
                const uint8_t * grid = (const uint8_t *)(iq2xs_grid + (x[i].qs[4*ib32 + l] & 511));
2318
0
                const uint8_t  signs = ksigns_iq2xs[x[i].qs[4*ib32 + l] >> 9];
2319
0
                for (int j = 0; j < 8; ++j) {
2320
0
                    y[j] = db[l/2] * grid[j] * (signs & kmask_iq2xs[j] ? -1.f : 1.f);
2321
0
                }
2322
0
                y += 8;
2323
0
            }
2324
0
        }
2325
0
    }
2326
0
}
2327
2328
// ====================== 2.5625 bpw (de)-quantization
2329
2330
0
void dequantize_row_iq2_s(const block_iq2_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2331
0
    assert(k % QK_K == 0);
2332
0
    const int64_t nb = k / QK_K;
2333
2334
0
    float db[2];
2335
2336
0
    for (int i = 0; i < nb; i++) {
2337
2338
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2339
0
        const uint8_t * qs = x[i].qs;
2340
0
        const uint8_t * qh = x[i].qh;
2341
0
        const uint8_t * signs = qs + QK_K/8;
2342
2343
0
        for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
2344
0
            db[0] = d * (0.5f + (x[i].scales[ib32] & 0xf)) * 0.25f;
2345
0
            db[1] = d * (0.5f + (x[i].scales[ib32] >>  4)) * 0.25f;
2346
0
            for (int l = 0; l < 4; ++l) {
2347
0
                const float dl = db[l/2];
2348
0
                const uint8_t * grid = (const uint8_t *)(iq2s_grid + (qs[l] | (qh[ib32] << (8-2*l) & 0x300)));
2349
0
                for (int j = 0; j < 8; ++j) {
2350
0
                    y[j] = dl * grid[j] * (signs[l] & kmask_iq2xs[j] ? -1.f : 1.f);
2351
0
                }
2352
0
                y += 8;
2353
0
            }
2354
0
            qs += 4;
2355
0
            signs += 4;
2356
0
        }
2357
0
    }
2358
0
}
2359
2360
// ====================== 3.0625 bpw (de)-quantization
2361
2362
0
void dequantize_row_iq3_xxs(const block_iq3_xxs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2363
0
    assert(k % QK_K == 0);
2364
0
    const int64_t nb = k / QK_K;
2365
2366
0
    uint32_t aux32;
2367
2368
0
    for (int i = 0; i < nb; i++) {
2369
2370
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2371
0
        const uint8_t * qs = x[i].qs;
2372
0
        const uint8_t * scales_and_signs = qs + QK_K/4;
2373
2374
0
        for (int ib32 = 0; ib32 < QK_K/32; ++ib32) {
2375
0
            memcpy(&aux32, scales_and_signs + 4*ib32, sizeof(uint32_t));
2376
0
            const float db = d * (0.5f + (aux32 >> 28)) * 0.5f;
2377
0
            for (int l = 0; l < 4; ++l) {
2378
0
                const uint8_t  signs = ksigns_iq2xs[(aux32 >> 7*l) & 127];
2379
0
                const uint8_t * grid1 = (const uint8_t *)(iq3xxs_grid + qs[2*l+0]);
2380
0
                const uint8_t * grid2 = (const uint8_t *)(iq3xxs_grid + qs[2*l+1]);
2381
0
                for (int j = 0; j < 4; ++j) {
2382
0
                    y[j+0] = db * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
2383
0
                    y[j+4] = db * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
2384
0
                }
2385
0
                y += 8;
2386
0
            }
2387
0
            qs += 8;
2388
0
        }
2389
0
    }
2390
0
}
2391
2392
// ====================== 3.3125 bpw (de)-quantization
2393
2394
0
void dequantize_row_iq3_s(const block_iq3_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2395
0
    assert(k % QK_K == 0);
2396
0
    const int64_t nb = k / QK_K;
2397
2398
0
    for (int i = 0; i < nb; i++) {
2399
2400
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2401
0
        const uint8_t * qs = x[i].qs;
2402
0
        const uint8_t * qh = x[i].qh;
2403
0
        const uint8_t * signs = x[i].signs;
2404
2405
0
        for (int ib32 = 0; ib32 < QK_K/32; ib32 += 2) {
2406
0
            const float db1 = d * (1 + 2*(x[i].scales[ib32/2] & 0xf));
2407
0
            const float db2 = d * (1 + 2*(x[i].scales[ib32/2] >>  4));
2408
0
            for (int l = 0; l < 4; ++l) {
2409
0
                const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2*l+0] | ((qh[0] << (8-2*l)) & 256)));
2410
0
                const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + (qs[2*l+1] | ((qh[0] << (7-2*l)) & 256)));
2411
0
                for (int j = 0; j < 4; ++j) {
2412
0
                    y[j+0] = db1 * grid1[j] * (signs[l] & kmask_iq2xs[j+0] ? -1.f : 1.f);
2413
0
                    y[j+4] = db1 * grid2[j] * (signs[l] & kmask_iq2xs[j+4] ? -1.f : 1.f);
2414
0
                }
2415
0
                y += 8;
2416
0
            }
2417
0
            qs += 8;
2418
0
            signs += 4;
2419
0
            for (int l = 0; l < 4; ++l) {
2420
0
                const uint8_t * grid1 = (const uint8_t *)(iq3s_grid + (qs[2*l+0] | ((qh[1] << (8-2*l)) & 256)));
2421
0
                const uint8_t * grid2 = (const uint8_t *)(iq3s_grid + (qs[2*l+1] | ((qh[1] << (7-2*l)) & 256)));
2422
0
                for (int j = 0; j < 4; ++j) {
2423
0
                    y[j+0] = db2 * grid1[j] * (signs[l] & kmask_iq2xs[j+0] ? -1.f : 1.f);
2424
0
                    y[j+4] = db2 * grid2[j] * (signs[l] & kmask_iq2xs[j+4] ? -1.f : 1.f);
2425
0
                }
2426
0
                y += 8;
2427
0
            }
2428
0
            qh += 2;
2429
0
            qs += 8;
2430
0
            signs += 4;
2431
0
        }
2432
0
    }
2433
0
}
2434
2435
// ====================== 1.5625 bpw (de)-quantization
2436
2437
0
void dequantize_row_iq1_s(const block_iq1_s * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2438
0
    assert(k % QK_K == 0);
2439
0
    const int64_t nb = k / QK_K;
2440
2441
0
    for (int i = 0; i < nb; i++) {
2442
2443
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2444
0
        const uint8_t  * qs = x[i].qs;
2445
0
        const uint16_t * qh = x[i].qh;
2446
2447
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
2448
0
            const float dl = d * (2*((qh[ib] >> 12) & 7) + 1);
2449
0
            const float delta = qh[ib] & 0x8000 ? -IQ1S_DELTA : IQ1S_DELTA;
2450
0
            for (int l = 0; l < 4; ++l) {
2451
0
                const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8)));
2452
0
                for (int j = 0; j < 8; ++j) {
2453
0
                    y[j] = dl * (grid[j] + delta);
2454
0
                }
2455
0
                y += 8;
2456
0
            }
2457
0
            qs += 4;
2458
0
        }
2459
0
    }
2460
0
}
2461
2462
0
void dequantize_row_iq1_m(const block_iq1_m * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2463
0
    assert(k % QK_K == 0);
2464
0
    const int64_t nb = k / QK_K;
2465
2466
0
    float delta[4];
2467
0
    uint16_t idx[4];
2468
2469
0
    iq1m_scale_t scale;
2470
2471
0
    for (int i = 0; i < nb; i++) {
2472
2473
0
        const uint16_t * sc = (const uint16_t *)x[i].scales;
2474
0
        scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
2475
0
        const float d = GGML_FP16_TO_FP32(scale.f16);
2476
2477
0
        const uint8_t * qs = x[i].qs;
2478
0
        const uint8_t * qh = x[i].qh;
2479
2480
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
2481
0
            const float dl1 = d * (2*((sc[ib/2] >> (6*(ib%2)+0)) & 0x7) + 1);
2482
0
            const float dl2 = d * (2*((sc[ib/2] >> (6*(ib%2)+3)) & 0x7) + 1);
2483
2484
0
            idx[0] = qs[0] | ((qh[0] << 8) & 0x700);
2485
0
            idx[1] = qs[1] | ((qh[0] << 4) & 0x700);
2486
0
            idx[2] = qs[2] | ((qh[1] << 8) & 0x700);
2487
0
            idx[3] = qs[3] | ((qh[1] << 4) & 0x700);
2488
0
            delta[0] = qh[0] & 0x08 ? -IQ1S_DELTA : IQ1S_DELTA;
2489
0
            delta[1] = qh[0] & 0x80 ? -IQ1S_DELTA : IQ1S_DELTA;
2490
0
            delta[2] = qh[1] & 0x08 ? -IQ1S_DELTA : IQ1S_DELTA;
2491
0
            delta[3] = qh[1] & 0x80 ? -IQ1S_DELTA : IQ1S_DELTA;
2492
0
            for (int l = 0; l < 2; ++l) {
2493
0
                const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]);
2494
0
                for (int j = 0; j < 8; ++j) {
2495
0
                    y[j] = dl1 * (grid[j] + delta[l]);
2496
0
                }
2497
0
                y += 8;
2498
0
            }
2499
0
            for (int l = 2; l < 4; ++l) {
2500
0
                const int8_t * grid = (const int8_t *)(iq1s_grid + idx[l]);
2501
0
                for (int j = 0; j < 8; ++j) {
2502
0
                    y[j] = dl2 * (grid[j] + delta[l]);
2503
0
                }
2504
0
                y += 8;
2505
0
            }
2506
0
            qs += 4;
2507
0
            qh += 2;
2508
0
        }
2509
0
    }
2510
0
}
2511
2512
0
void dequantize_row_iq4_nl(const block_iq4_nl * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2513
0
    assert(k % QK4_NL == 0);
2514
0
    const int64_t nb = k / QK4_NL;
2515
2516
0
    for (int i = 0; i < nb; i++) {
2517
2518
0
        const uint8_t * qs = x[i].qs;
2519
2520
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2521
0
        for (int j = 0; j < QK4_NL/2; ++j) {
2522
0
            y[j+       0] = d * kvalues_iq4nl[qs[j] & 0xf];
2523
0
            y[j+QK4_NL/2] = d * kvalues_iq4nl[qs[j] >>  4];
2524
0
        }
2525
0
        y  += QK4_NL;
2526
0
        qs += QK4_NL/2;
2527
0
    }
2528
0
}
2529
2530
0
void dequantize_row_iq4_xs(const block_iq4_xs * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2531
0
    assert(k % QK_K == 0);
2532
0
    const int64_t nb = k / QK_K;
2533
2534
0
    for (int i = 0; i < nb; i++) {
2535
2536
0
        const uint8_t * qs = x[i].qs;
2537
2538
0
        const float d = GGML_FP16_TO_FP32(x[i].d);
2539
2540
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
2541
0
            const int ls = ((x[i].scales_l[ib/2] >> 4*(ib%2)) & 0xf) | (((x[i].scales_h >> 2*ib) & 3) << 4);
2542
0
            const float dl = d * (ls - 32);
2543
0
            for (int j = 0; j < 16; ++j) {
2544
0
                y[j+ 0] = dl * kvalues_iq4nl[qs[j] & 0xf];
2545
0
                y[j+16] = dl * kvalues_iq4nl[qs[j] >>  4];
2546
0
            }
2547
0
            y  += 32;
2548
0
            qs += 16;
2549
0
        }
2550
0
    }
2551
0
}
2552
2553
//===================================== Q8_K ==============================================
2554
2555
0
void quantize_row_q8_K_ref(const float * GGML_RESTRICT x, block_q8_K * GGML_RESTRICT y, int64_t k) {
2556
0
    assert(k % QK_K == 0);
2557
0
    const int64_t nb = k / QK_K;
2558
2559
0
    for (int i = 0; i < nb; i++) {
2560
2561
0
        float max = 0;
2562
0
        float amax = 0;
2563
0
        for (int j = 0; j < QK_K; ++j) {
2564
0
            float ax = fabsf(x[j]);
2565
0
            if (ax > amax) {
2566
0
                amax = ax; max = x[j];
2567
0
            }
2568
0
        }
2569
0
        if (!amax) {
2570
0
            y[i].d = 0;
2571
0
            memset(y[i].qs, 0, QK_K);
2572
0
            x += QK_K;
2573
0
            continue;
2574
0
        }
2575
        //const float iscale = -128.f/max;
2576
        // We need this change for IQ2_XXS, else the AVX implementation becomes very awkward
2577
0
        const float iscale = -127.f/max;
2578
0
        for (int j = 0; j < QK_K; ++j) {
2579
0
            int v = nearest_int(iscale*x[j]);
2580
0
            y[i].qs[j] = MIN(127, v);
2581
0
        }
2582
0
        for (int j = 0; j < QK_K/16; ++j) {
2583
0
            int sum = 0;
2584
0
            for (int ii = 0; ii < 16; ++ii) {
2585
0
                sum += y[i].qs[j*16 + ii];
2586
0
            }
2587
0
            y[i].bsums[j] = sum;
2588
0
        }
2589
0
        y[i].d = 1/iscale;
2590
0
        x += QK_K;
2591
0
    }
2592
0
}
2593
2594
0
void dequantize_row_q8_K(const block_q8_K * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k) {
2595
0
    assert(k % QK_K == 0);
2596
0
    const int64_t nb = k / QK_K;
2597
2598
0
    for (int i = 0; i < nb; i++) {
2599
0
        for (int j = 0; j < QK_K; ++j) {
2600
0
            *y++ = x[i].d * x[i].qs[j];
2601
0
        }
2602
0
    }
2603
0
}
2604
2605
// ================================ IQ2 quantization =============================================
2606
2607
typedef struct {
2608
    uint64_t * grid;
2609
    int      * map;
2610
    uint16_t * neighbours;
2611
} iq2_entry_t;
2612
2613
static iq2_entry_t iq2_data[4] = {
2614
    {NULL, NULL, NULL},
2615
    {NULL, NULL, NULL},
2616
    {NULL, NULL, NULL},
2617
    {NULL, NULL, NULL},
2618
};
2619
2620
2.61k
static inline int iq2_data_index(enum ggml_type type) {
2621
2.61k
    GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S);
2622
2.61k
    return type == GGML_TYPE_IQ2_XXS ? 0 :
2623
2.61k
           type == GGML_TYPE_IQ2_XS  ? 1 :
2624
1.74k
           type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? 2 : 3;
2625
2.61k
}
2626
2627
0
static inline int iq2_grid_size(enum ggml_type type) {
2628
0
    GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S);
2629
0
    return type == GGML_TYPE_IQ2_XXS ? 256 :
2630
0
           type == GGML_TYPE_IQ2_XS  ? 512 :
2631
0
           type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? NGRID_IQ1S : 1024;
2632
0
}
2633
2634
0
static int iq2_compare_func(const void * left, const void * right) {
2635
0
    const int * l = (const int *)left;
2636
0
    const int * r = (const int *)right;
2637
0
    return l[0] < r[0] ? -1 : l[0] > r[0] ? 1 : l[1] < r[1] ? -1 : l[1] > r[1] ? 1 : 0;
2638
0
}
2639
2640
0
void iq2xs_init_impl(enum ggml_type type) {
2641
0
    const int gindex = iq2_data_index(type);
2642
0
    const int grid_size = iq2_grid_size(type);
2643
0
    if (iq2_data[gindex].grid) {
2644
0
        return;
2645
0
    }
2646
0
    static const uint16_t kgrid_2bit_256[256] = {
2647
0
            0,     2,     5,     8,    10,    17,    20,    32,    34,    40,    42,    65,    68,    80,    88,    97,
2648
0
          100,   128,   130,   138,   162,   257,   260,   272,   277,   320,   388,   408,   512,   514,   546,   642,
2649
0
         1025,  1028,  1040,  1057,  1060,  1088,  1090,  1096,  1120,  1153,  1156,  1168,  1188,  1280,  1282,  1288,
2650
0
         1312,  1350,  1385,  1408,  1425,  1545,  1552,  1600,  1668,  1700,  2048,  2053,  2056,  2068,  2088,  2113,
2651
0
         2116,  2128,  2130,  2184,  2308,  2368,  2562,  2580,  4097,  4100,  4112,  4129,  4160,  4192,  4228,  4240,
2652
0
         4245,  4352,  4360,  4384,  4432,  4442,  4480,  4644,  4677,  5120,  5128,  5152,  5157,  5193,  5248,  5400,
2653
0
         5474,  5632,  5654,  6145,  6148,  6160,  6208,  6273,  6400,  6405,  6560,  6737,  8192,  8194,  8202,  8260,
2654
0
         8289,  8320,  8322,  8489,  8520,  8704,  8706,  9217,  9220,  9232,  9280,  9302,  9472,  9537,  9572,  9872,
2655
0
        10248, 10272, 10388, 10820, 16385, 16388, 16400, 16408, 16417, 16420, 16448, 16456, 16470, 16480, 16513, 16516,
2656
0
        16528, 16640, 16672, 16737, 16768, 16773, 16897, 16912, 16968, 16982, 17000, 17408, 17416, 17440, 17536, 17561,
2657
0
        17682, 17700, 17920, 18433, 18436, 18448, 18496, 18501, 18688, 18776, 18785, 18818, 19013, 19088, 20480, 20488,
2658
0
        20497, 20505, 20512, 20608, 20616, 20740, 20802, 20900, 21137, 21648, 21650, 21770, 22017, 22100, 22528, 22545,
2659
0
        22553, 22628, 22848, 23048, 24580, 24592, 24640, 24680, 24832, 24917, 25112, 25184, 25600, 25605, 25872, 25874,
2660
0
        25988, 26690, 32768, 32770, 32778, 32833, 32898, 33028, 33048, 33088, 33297, 33793, 33796, 33808, 33813, 33856,
2661
0
        33888, 34048, 34118, 34196, 34313, 34368, 34400, 34818, 35076, 35345, 36868, 36880, 36900, 36928, 37025, 37142,
2662
0
        37248, 37445, 37888, 37922, 37956, 38225, 39041, 39200, 40962, 41040, 41093, 41225, 41472, 42008, 43088, 43268,
2663
0
    };
2664
0
    static const uint16_t kgrid_2bit_512[512] = {
2665
0
            0,     2,     5,     8,    10,    17,    20,    22,    25,    32,    34,    37,    40,    65,    68,    70,
2666
0
           73,    80,    82,    85,    88,    97,   100,   128,   130,   133,   136,   145,   148,   153,   160,   257,
2667
0
          260,   262,   265,   272,   274,   277,   280,   282,   289,   292,   320,   322,   325,   328,   337,   340,
2668
0
          352,   360,   385,   388,   400,   512,   514,   517,   520,   529,   532,   544,   577,   580,   592,   597,
2669
0
          640,   650,  1025,  1028,  1030,  1033,  1040,  1042,  1045,  1048,  1057,  1060,  1088,  1090,  1093,  1096,
2670
0
         1105,  1108,  1110,  1120,  1153,  1156,  1168,  1280,  1282,  1285,  1288,  1297,  1300,  1312,  1345,  1348,
2671
0
         1360,  1377,  1408,  1537,  1540,  1552,  1574,  1600,  1602,  1668,  2048,  2050,  2053,  2056,  2058,  2065,
2672
0
         2068,  2080,  2085,  2113,  2116,  2128,  2136,  2176,  2208,  2218,  2305,  2308,  2320,  2368,  2433,  2441,
2673
0
         2560,  2592,  2600,  2710,  2720,  4097,  4100,  4102,  4105,  4112,  4114,  4117,  4120,  4129,  4132,  4160,
2674
0
         4162,  4165,  4168,  4177,  4180,  4192,  4202,  4225,  4228,  4240,  4352,  4354,  4357,  4360,  4369,  4372,
2675
0
         4384,  4417,  4420,  4432,  4480,  4500,  4502,  4609,  4612,  4614,  4624,  4672,  4704,  5120,  5122,  5125,
2676
0
         5128,  5137,  5140,  5152,  5185,  5188,  5193,  5200,  5220,  5248,  5377,  5380,  5392,  5440,  5632,  5652,
2677
0
         5705,  6145,  6148,  6160,  6162,  6208,  6228,  6278,  6400,  6405,  6502,  6737,  6825,  8192,  8194,  8197,
2678
0
         8200,  8202,  8209,  8212,  8224,  8257,  8260,  8272,  8320,  8352,  8449,  8452,  8464,  8512,  8520,  8549,
2679
0
         8704,  8738,  8832,  8872,  9217,  9220,  9232,  9257,  9280,  9472,  9537,  9554,  9625,  9729,  9754,  9894,
2680
0
        10240, 10248, 10250, 10272, 10325, 10376, 10402, 10600, 10640, 10760, 10784, 10882, 10888, 10890, 16385, 16388,
2681
0
        16390, 16393, 16400, 16402, 16405, 16408, 16417, 16420, 16448, 16450, 16453, 16456, 16458, 16465, 16468, 16480,
2682
0
        16485, 16513, 16516, 16528, 16640, 16642, 16645, 16648, 16657, 16660, 16672, 16705, 16708, 16720, 16768, 16773,
2683
0
        16802, 16897, 16900, 16912, 16914, 16937, 16960, 17408, 17410, 17413, 17416, 17425, 17428, 17433, 17440, 17473,
2684
0
        17476, 17488, 17536, 17556, 17665, 17668, 17680, 17700, 17728, 17818, 17920, 17930, 17988, 18000, 18433, 18436,
2685
0
        18448, 18496, 18501, 18516, 18530, 18688, 18705, 18756, 18768, 18793, 18948, 20480, 20482, 20485, 20488, 20497,
2686
0
        20500, 20512, 20520, 20545, 20548, 20560, 20608, 20737, 20740, 20752, 20757, 20800, 20802, 20992, 21060, 21162,
2687
0
        21505, 21508, 21520, 21537, 21568, 21600, 21633, 21665, 21760, 21768, 21888, 21896, 22049, 22120, 22177, 22528,
2688
0
        22548, 22593, 22608, 22681, 22810, 22848, 22850, 23173, 24577, 24580, 24592, 24640, 24660, 24674, 24710, 24745,
2689
0
        24832, 25124, 25162, 25234, 25600, 25622, 25872, 25920, 25925, 26020, 26625, 26730, 26917, 27142, 27220, 27234,
2690
0
        32768, 32770, 32773, 32776, 32785, 32788, 32800, 32810, 32833, 32836, 32848, 32896, 32898, 32936, 32938, 33025,
2691
0
        33028, 33030, 33040, 33088, 33105, 33113, 33280, 33312, 33408, 33410, 33440, 33448, 33793, 33796, 33808, 33810,
2692
0
        33813, 33856, 33888, 33929, 34048, 34116, 34213, 34328, 34410, 34816, 34824, 34853, 34906, 34944, 34946, 34984,
2693
0
        35078, 35362, 35456, 35464, 35478, 35496, 36865, 36868, 36880, 36928, 36950, 36996, 37120, 37154, 37220, 37462,
2694
0
        37513, 37888, 37893, 37956, 37968, 37976, 38185, 38288, 38290, 38465, 38993, 39078, 39241, 39445, 39520, 40960,
2695
0
        40962, 40968, 40970, 40992, 41002, 41120, 41297, 41305, 41382, 41472, 41474, 41480, 41514, 41600, 41632, 42048,
2696
0
        42133, 42597, 42648, 43018, 43040, 43042, 43048, 43168, 43176, 43268, 43396, 43398, 43560, 43562, 43665, 43690,
2697
0
    };
2698
0
    static const uint16_t kgrid_1bit_2048[NGRID_IQ1S] = {
2699
0
            0,     2,     5,     8,    10,    17,    21,    32,    34,    40,    42,    69,    81,    84,    86,   101,
2700
0
          128,   130,   136,   138,   149,   160,   162,   168,   170,   260,   261,   273,   276,   278,   281,   282,
2701
0
          293,   321,   326,   329,   338,   341,   346,   353,   356,   358,   360,   389,   401,   404,   406,   421,
2702
0
          512,   514,   520,   522,   533,   544,   546,   552,   554,   581,   593,   601,   612,   617,   640,   642,
2703
0
          648,   650,   657,   661,   665,   672,   674,   680,   682,  1041,  1044,  1046,  1061,  1089,  1097,  1109,
2704
0
         1114,  1124,  1125,  1169,  1177,  1189,  1281,  1284,  1285,  1286,  1301,  1304,  1306,  1321,  1344,  1349,
2705
0
         1354,  1360,  1361,  1364,  1365,  1366,  1369,  1376,  1378,  1381,  1384,  1386,  1409,  1425,  1429,  1432,
2706
0
         1434,  1441,  1444,  1445,  1446,  1449,  1556,  1561,  1601,  1604,  1616,  1618,  1621,  1624,  1632,  1633,
2707
0
         1638,  1641,  1669,  1681,  1684,  1689,  2048,  2050,  2056,  2058,  2069,  2080,  2082,  2088,  2090,  2117,
2708
0
         2129,  2134,  2149,  2176,  2178,  2184,  2186,  2197,  2208,  2210,  2216,  2218,  2309,  2321,  2324,  2329,
2709
0
         2340,  2341,  2369,  2384,  2385,  2389,  2401,  2404,  2409,  2449,  2452,  2454,  2457,  2469,  2560,  2562,
2710
0
         2568,  2570,  2581,  2592,  2594,  2600,  2602,  2629,  2641,  2649,  2657,  2661,  2688,  2690,  2693,  2696,
2711
0
         2698,  2709,  2720,  2722,  2728,  2730,  4112,  4113,  4116,  4121,  4132,  4133,  4161,  4164,  4176,  4181,
2712
0
         4184,  4193,  4196,  4197,  4201,  4241,  4244,  4246,  4257,  4261,  4353,  4356,  4358,  4361,  4368,  4370,
2713
0
         4373,  4376,  4385,  4388,  4393,  4421,  4426,  4432,  4433,  4434,  4436,  4437,  4438,  4441,  4448,  4453,
2714
0
         4484,  4498,  4501,  4513,  4516,  4625,  4628,  4630,  4645,  4672,  4678,  4681,  4690,  4693,  4696,  4698,
2715
0
         4708,  4710,  4741,  4753,  4756,  4758,  4773,  5121,  5126,  5129,  5140,  5141,  5144,  5145,  5153,  5158,
2716
0
         5185,  5189,  5190,  5192,  5194,  5201,  5204,  5205,  5206,  5209,  5218,  5221,  5224,  5252,  5257,  5264,
2717
0
         5268,  5269,  5272,  5273,  5274,  5281,  5284,  5285,  5289,  5378,  5381,  5386,  5393,  5396,  5397,  5398,
2718
0
         5401,  5408,  5410,  5413,  5416,  5418,  5441,  5444,  5445,  5446,  5457,  5458,  5460,  5461,  5462,  5465,
2719
0
         5466,  5473,  5476,  5477,  5478,  5481,  5504,  5506,  5508,  5509,  5512,  5514,  5520,  5521,  5524,  5525,
2720
0
         5526,  5529,  5530,  5536,  5538,  5541,  5633,  5636,  5637,  5638,  5653,  5654,  5656,  5658,  5665,  5670,
2721
0
         5696,  5698,  5700,  5701,  5704,  5706,  5713,  5717,  5718,  5720,  5721,  5729,  5732,  5733,  5736,  5737,
2722
0
         5738,  5766,  5770,  5778,  5781,  5796,  5801,  6161,  6166,  6181,  6209,  6212,  6214,  6217,  6224,  6229,
2723
0
         6232,  6234,  6240,  6241,  6244,  6246,  6249,  6277,  6289,  6292,  6309,  6416,  6418,  6421,  6426,  6433,
2724
0
         6437,  6466,  6468,  6469,  6472,  6481,  6484,  6485,  6486,  6489,  6490,  6496,  6501,  6506,  6537,  6545,
2725
0
         6546,  6549,  6552,  6561,  6566,  6569,  6665,  6678,  6692,  6694,  6724,  6726,  6729,  6736,  6738,  6741,
2726
0
         6744,  6753,  6758,  6761,  6789,  6801,  6806,  6810,  8192,  8194,  8200,  8202,  8213,  8224,  8226,  8229,
2727
0
         8232,  8234,  8261,  8273,  8281,  8289,  8293,  8320,  8322,  8328,  8330,  8341,  8352,  8354,  8357,  8360,
2728
0
         8362,  8453,  8465,  8468,  8473,  8485,  8514,  8516,  8521,  8533,  8536,  8538,  8545,  8548,  8549,  8550,
2729
0
         8581,  8592,  8598,  8601,  8613,  8705,  8712,  8714,  8721,  8725,  8736,  8738,  8744,  8746,  8773,  8785,
2730
0
         8790,  8793,  8805,  8833,  8840,  8842,  8849,  8853,  8864,  8866,  8872,  8874,  9221,  9236,  9238,  9241,
2731
0
         9253,  9284,  9285,  9286,  9289,  9298,  9301,  9304,  9306,  9318,  9349,  9361,  9364,  9369,  9377,  9381,
2732
0
         9481,  9493,  9505,  9513,  9536,  9541,  9544,  9553,  9556,  9557,  9561,  9570,  9573,  9576,  9609,  9616,
2733
0
         9620,  9621,  9624,  9626,  9633,  9636,  9638,  9641,  9733,  9744,  9746,  9753,  9765,  9793,  9801,  9813,
2734
0
         9824,  9825,  9833,  9860,  9862,  9872,  9882, 10240, 10242, 10248, 10250, 10261, 10272, 10274, 10280, 10282,
2735
0
        10309, 10321, 10324, 10341, 10368, 10370, 10376, 10378, 10400, 10402, 10408, 10410, 10505, 10513, 10516, 10521,
2736
0
        10533, 10566, 10569, 10578, 10581, 10593, 10596, 10598, 10601, 10629, 10640, 10646, 10649, 10660, 10661, 10752,
2737
0
        10754, 10760, 10762, 10784, 10786, 10792, 10794, 10821, 10833, 10838, 10841, 10853, 10880, 10882, 10888, 10890,
2738
0
        10901, 10912, 10914, 10920, 10922, 16389, 16401, 16406, 16421, 16457, 16466, 16469, 16472, 16474, 16481, 16484,
2739
0
        16486, 16532, 16537, 16545, 16550, 16640, 16641, 16644, 16646, 16649, 16658, 16661, 16662, 16664, 16666, 16673,
2740
0
        16678, 16681, 16709, 16712, 16714, 16721, 16724, 16725, 16726, 16729, 16730, 16741, 16744, 16746, 16769, 16772,
2741
0
        16774, 16784, 16786, 16789, 16800, 16801, 16802, 16901, 16913, 16916, 16918, 16933, 16961, 16978, 16981, 16986,
2742
0
        16996, 17001, 17033, 17044, 17061, 17409, 17429, 17433, 17449, 17477, 17480, 17482, 17489, 17492, 17493, 17494,
2743
0
        17505, 17506, 17509, 17512, 17514, 17537, 17542, 17545, 17552, 17554, 17557, 17568, 17569, 17577, 17665, 17666,
2744
0
        17669, 17674, 17681, 17684, 17685, 17686, 17689, 17696, 17701, 17706, 17729, 17732, 17733, 17734, 17737, 17744,
2745
0
        17745, 17748, 17749, 17750, 17752, 17753, 17761, 17764, 17765, 17766, 17769, 17794, 17796, 17797, 17800, 17809,
2746
0
        17812, 17813, 17814, 17817, 17818, 17829, 17832, 17834, 17921, 17925, 17929, 17940, 17941, 17944, 17946, 17953,
2747
0
        17956, 17961, 17984, 17986, 17989, 17992, 18000, 18001, 18002, 18005, 18006, 18009, 18018, 18021, 18024, 18049,
2748
0
        18053, 18058, 18068, 18069, 18081, 18084, 18086, 18437, 18449, 18453, 18458, 18469, 18498, 18505, 18512, 18517,
2749
0
        18520, 18529, 18532, 18534, 18537, 18565, 18577, 18580, 18582, 18585, 18597, 18689, 18693, 18694, 18698, 18704,
2750
0
        18708, 18709, 18712, 18721, 18724, 18726, 18752, 18757, 18762, 18769, 18770, 18772, 18773, 18774, 18777, 18784,
2751
0
        18786, 18789, 18790, 18794, 18822, 18825, 18834, 18837, 18838, 18840, 18849, 18852, 18854, 18857, 18966, 19012,
2752
0
        19014, 19017, 19029, 19032, 19034, 19044, 19049, 19092, 19109, 20481, 20484, 20485, 20486, 20489, 20498, 20501,
2753
0
        20506, 20513, 20516, 20521, 20544, 20549, 20552, 20561, 20564, 20565, 20566, 20569, 20581, 20584, 20614, 20617,
2754
0
        20629, 20632, 20640, 20641, 20646, 20649, 20741, 20744, 20745, 20746, 20753, 20756, 20757, 20758, 20760, 20761,
2755
0
        20768, 20773, 20774, 20776, 20778, 20801, 20804, 20805, 20806, 20809, 20816, 20817, 20818, 20820, 20821, 20822,
2756
0
        20824, 20825, 20826, 20833, 20836, 20837, 20838, 20841, 20866, 20869, 20881, 20884, 20885, 20886, 20889, 20896,
2757
0
        20901, 20906, 20993, 20998, 21010, 21013, 21018, 21025, 21028, 21058, 21061, 21066, 21073, 21076, 21077, 21078,
2758
0
        21081, 21090, 21093, 21125, 21136, 21138, 21141, 21145, 21146, 21156, 21508, 21509, 21521, 21524, 21525, 21526,
2759
0
        21528, 21529, 21537, 21541, 21544, 21546, 21569, 21572, 21573, 21574, 21577, 21578, 21584, 21585, 21588, 21589,
2760
0
        21590, 21592, 21593, 21594, 21601, 21602, 21604, 21605, 21606, 21609, 21632, 21640, 21642, 21649, 21652, 21653,
2761
0
        21654, 21657, 21665, 21668, 21669, 21674, 21761, 21762, 21764, 21765, 21766, 21769, 21776, 21777, 21778, 21780,
2762
0
        21781, 21782, 21785, 21786, 21793, 21796, 21797, 21798, 21801, 21824, 21825, 21826, 21828, 21829, 21830, 21832,
2763
0
        21833, 21840, 21841, 21842, 21844, 21845, 21846, 21848, 21849, 21850, 21856, 21857, 21860, 21861, 21862, 21864,
2764
0
        21865, 21866, 21889, 21892, 21893, 21897, 21898, 21904, 21905, 21908, 21909, 21910, 21912, 21913, 21921, 21924,
2765
0
        21925, 21926, 21929, 22016, 22017, 22018, 22020, 22022, 22024, 22025, 22033, 22036, 22037, 22040, 22041, 22048,
2766
0
        22049, 22050, 22052, 22053, 22054, 22056, 22057, 22081, 22085, 22086, 22088, 22089, 22090, 22096, 22097, 22098,
2767
0
        22100, 22101, 22102, 22104, 22105, 22106, 22113, 22116, 22117, 22121, 22146, 22149, 22150, 22152, 22153, 22154,
2768
0
        22161, 22165, 22170, 22178, 22181, 22182, 22184, 22185, 22532, 22533, 22534, 22537, 22544, 22549, 22552, 22561,
2769
0
        22570, 22597, 22600, 22602, 22609, 22612, 22613, 22614, 22616, 22617, 22624, 22626, 22628, 22629, 22658, 22665,
2770
0
        22672, 22674, 22677, 22680, 22689, 22697, 22785, 22786, 22789, 22794, 22801, 22804, 22805, 22806, 22809, 22821,
2771
0
        22849, 22852, 22853, 22854, 22857, 22864, 22865, 22866, 22868, 22869, 22870, 22872, 22873, 22874, 22881, 22884,
2772
0
        22885, 22886, 22889, 22913, 22917, 22921, 22929, 22932, 22933, 22934, 22936, 22937, 22949, 23044, 23048, 23061,
2773
0
        23066, 23072, 23077, 23078, 23081, 23109, 23112, 23113, 23121, 23125, 23126, 23128, 23129, 23138, 23141, 23144,
2774
0
        23146, 23169, 23178, 23186, 23189, 23190, 23192, 23194, 23201, 24581, 24596, 24598, 24601, 24613, 24644, 24656,
2775
0
        24661, 24662, 24664, 24666, 24673, 24676, 24678, 24681, 24705, 24726, 24741, 24833, 24836, 24838, 24841, 24850,
2776
0
        24853, 24865, 24866, 24870, 24873, 24901, 24905, 24913, 24917, 24918, 24921, 24933, 24934, 24938, 24964, 24970,
2777
0
        24978, 24981, 24993, 24998, 25001, 25105, 25110, 25113, 25152, 25153, 25158, 25173, 25174, 25176, 25184, 25221,
2778
0
        25233, 25238, 25253, 25617, 25618, 25621, 25622, 25626, 25633, 25638, 25641, 25664, 25666, 25669, 25672, 25674,
2779
0
        25681, 25684, 25685, 25686, 25689, 25690, 25696, 25698, 25701, 25732, 25733, 25737, 25744, 25746, 25748, 25749,
2780
0
        25750, 25752, 25754, 25761, 25764, 25769, 25861, 25864, 25866, 25873, 25877, 25878, 25881, 25924, 25925, 25926,
2781
0
        25929, 25936, 25937, 25940, 25941, 25942, 25945, 25953, 25956, 25957, 25958, 25961, 25990, 25993, 25994, 26001,
2782
0
        26005, 26006, 26009, 26010, 26018, 26021, 26022, 26024, 26114, 26121, 26133, 26144, 26150, 26152, 26153, 26176,
2783
0
        26181, 26184, 26186, 26193, 26196, 26197, 26198, 26200, 26202, 26208, 26213, 26216, 26240, 26242, 26245, 26250,
2784
0
        26260, 26262, 26264, 26265, 26272, 26276, 26278, 26282, 26646, 26649, 26661, 26689, 26706, 26709, 26714, 26721,
2785
0
        26729, 26757, 26769, 26776, 26790, 26881, 26884, 26896, 26901, 26913, 26916, 26918, 26921, 26944, 26945, 26949,
2786
0
        26950, 26952, 26961, 26964, 26965, 26966, 26969, 26976, 26981, 26986, 27010, 27012, 27018, 27029, 27041, 27044,
2787
0
        27045, 27049, 27153, 27158, 27160, 27201, 27204, 27209, 27216, 27221, 27224, 27226, 27236, 27237, 27241, 27270,
2788
0
        27284, 27288, 27290, 27302, 32768, 32770, 32776, 32778, 32800, 32802, 32808, 32810, 32837, 32848, 32849, 32852,
2789
0
        32854, 32857, 32869, 32896, 32898, 32904, 32906, 32917, 32928, 32930, 32936, 32938, 33029, 33041, 33044, 33046,
2790
0
        33049, 33061, 33089, 33092, 33097, 33104, 33106, 33109, 33110, 33112, 33113, 33124, 33126, 33129, 33157, 33161,
2791
0
        33172, 33174, 33177, 33189, 33280, 33282, 33288, 33290, 33301, 33312, 33314, 33320, 33322, 33361, 33364, 33369,
2792
0
        33381, 33408, 33410, 33416, 33418, 33429, 33440, 33442, 33448, 33450, 33812, 33817, 33857, 33860, 33873, 33877,
2793
0
        33882, 33889, 33892, 33897, 33940, 33945, 34049, 34057, 34066, 34069, 34074, 34086, 34089, 34112, 34113, 34117,
2794
0
        34120, 34129, 34132, 34133, 34134, 34137, 34138, 34149, 34150, 34152, 34154, 34177, 34180, 34182, 34185, 34192,
2795
0
        34194, 34197, 34200, 34214, 34321, 34326, 34329, 34341, 34369, 34372, 34377, 34378, 34384, 34389, 34393, 34394,
2796
0
        34401, 34406, 34410, 34437, 34449, 34458, 34468, 34816, 34818, 34824, 34826, 34837, 34848, 34850, 34856, 34858,
2797
0
        34881, 34885, 34897, 34900, 34905, 34917, 34921, 34944, 34946, 34952, 34954, 34965, 34976, 34978, 34984, 34986,
2798
0
        35077, 35078, 35089, 35092, 35094, 35109, 35137, 35140, 35142, 35145, 35152, 35154, 35157, 35162, 35169, 35172,
2799
0
        35205, 35222, 35225, 35237, 35328, 35330, 35336, 35338, 35349, 35360, 35362, 35368, 35370, 35397, 35409, 35412,
2800
0
        35414, 35456, 35458, 35464, 35466, 35477, 35488, 35490, 35496, 35498, 36869, 36881, 36886, 36888, 36889, 36901,
2801
0
        36929, 36934, 36937, 36949, 36952, 36954, 36969, 36970, 36997, 37009, 37012, 37014, 37017, 37029, 37121, 37124,
2802
0
        37126, 37129, 37136, 37141, 37144, 37146, 37153, 37156, 37158, 37161, 37184, 37189, 37200, 37201, 37204, 37205,
2803
0
        37206, 37209, 37218, 37221, 37252, 37254, 37266, 37269, 37272, 37281, 37284, 37286, 37289, 37381, 37393, 37396,
2804
0
        37401, 37413, 37444, 37446, 37449, 37456, 37458, 37461, 37464, 37478, 37481, 37509, 37524, 37526, 37545, 37889,
2805
0
        37892, 37894, 37904, 37909, 37912, 37926, 37952, 37962, 37969, 37972, 37973, 37974, 37976, 37977, 37984, 37985,
2806
0
        37986, 37989, 38020, 38022, 38034, 38036, 38037, 38040, 38049, 38057, 38144, 38149, 38152, 38154, 38160, 38161,
2807
0
        38164, 38165, 38166, 38169, 38177, 38181, 38185, 38186, 38209, 38212, 38213, 38214, 38217, 38224, 38225, 38226,
2808
0
        38228, 38229, 38230, 38232, 38233, 38234, 38241, 38244, 38245, 38246, 38249, 38273, 38277, 38280, 38289, 38290,
2809
0
        38292, 38293, 38294, 38297, 38298, 38304, 38306, 38309, 38312, 38314, 38401, 38404, 38416, 38421, 38425, 38432,
2810
0
        38438, 38441, 38469, 38472, 38473, 38481, 38482, 38485, 38486, 38489, 38501, 38504, 38530, 38532, 38537, 38538,
2811
0
        38546, 38548, 38549, 38564, 38566, 38569, 38917, 38934, 38937, 38949, 38977, 38982, 38992, 38994, 38997, 38998,
2812
0
        39002, 39012, 39013, 39045, 39057, 39062, 39065, 39077, 39172, 39174, 39177, 39184, 39186, 39189, 39192, 39194,
2813
0
        39200, 39201, 39204, 39206, 39232, 39234, 39237, 39240, 39242, 39249, 39252, 39253, 39254, 39257, 39266, 39269,
2814
0
        39270, 39274, 39297, 39300, 39312, 39314, 39317, 39322, 39329, 39334, 39429, 39445, 39461, 39492, 39494, 39497,
2815
0
        39504, 39509, 39512, 39521, 39557, 39569, 39572, 39573, 39574, 40960, 40962, 40968, 40970, 40981, 40992, 40994,
2816
0
        41000, 41002, 41029, 41041, 41044, 41046, 41049, 41088, 41090, 41096, 41098, 41109, 41120, 41122, 41128, 41130,
2817
0
        41221, 41225, 41233, 41236, 41238, 41241, 41242, 41286, 41289, 41297, 41301, 41304, 41306, 41313, 41316, 41349,
2818
0
        41360, 41362, 41366, 41369, 41474, 41480, 41482, 41488, 41497, 41506, 41512, 41514, 41541, 41553, 41558, 41561,
2819
0
        41573, 41600, 41602, 41608, 41610, 41621, 41632, 41634, 41640, 41642, 42009, 42021, 42049, 42052, 42064, 42068,
2820
0
        42069, 42072, 42074, 42081, 42085, 42086, 42088, 42089, 42117, 42246, 42249, 42256, 42258, 42261, 42264, 42278,
2821
0
        42281, 42306, 42309, 42321, 42324, 42325, 42326, 42329, 42341, 42346, 42369, 42372, 42373, 42374, 42377, 42386,
2822
0
        42389, 42392, 42501, 42513, 42518, 42522, 42529, 42533, 42564, 42566, 42570, 42578, 42581, 42582, 42584, 42592,
2823
0
        42594, 42630, 42640, 42645, 42646, 42649, 42657, 42660, 42662, 43008, 43010, 43016, 43018, 43040, 43042, 43048,
2824
0
        43050, 43089, 43092, 43094, 43097, 43136, 43138, 43144, 43146, 43157, 43168, 43170, 43176, 43178, 43269, 43284,
2825
0
        43289, 43297, 43301, 43329, 43344, 43349, 43354, 43361, 43366, 43369, 43408, 43414, 43520, 43522, 43528, 43530,
2826
0
        43552, 43554, 43560, 43562, 43601, 43604, 43606, 43648, 43650, 43656, 43658, 43669, 43680, 43682, 43688, 43690,
2827
0
    };
2828
0
    static const uint16_t kgrid_2bit_1024[1024] = {
2829
0
            0,     2,     5,     8,    10,    17,    20,    22,    25,    32,    34,    37,    40,    65,    68,    70,
2830
0
           73,    80,    82,    85,    88,    97,   100,   102,   105,   128,   130,   133,   136,   145,   148,   160,
2831
0
          165,   170,   257,   260,   262,   265,   272,   274,   277,   280,   289,   292,   320,   322,   325,   328,
2832
0
          337,   340,   342,   345,   352,   357,   360,   385,   388,   400,   402,   405,   417,   420,   512,   514,
2833
0
          517,   520,   529,   532,   544,   554,   577,   580,   582,   585,   592,   597,   640,   645,   650,   660,
2834
0
          674,  1025,  1028,  1030,  1033,  1040,  1042,  1045,  1048,  1057,  1060,  1062,  1065,  1088,  1090,  1093,
2835
0
         1096,  1098,  1105,  1108,  1110,  1113,  1120,  1122,  1125,  1153,  1156,  1158,  1161,  1168,  1173,  1176,
2836
0
         1185,  1188,  1280,  1282,  1285,  1288,  1290,  1297,  1300,  1302,  1305,  1312,  1317,  1320,  1345,  1348,
2837
0
         1350,  1353,  1360,  1362,  1365,  1368,  1377,  1380,  1408,  1410,  1413,  1416,  1425,  1428,  1440,  1537,
2838
0
         1540,  1542,  1545,  1552,  1557,  1600,  1605,  1608,  1617,  1620,  1632,  1665,  1668,  1680,  2048,  2050,
2839
0
         2053,  2056,  2065,  2068,  2070,  2073,  2080,  2085,  2090,  2113,  2116,  2118,  2121,  2128,  2130,  2133,
2840
0
         2136,  2145,  2148,  2176,  2181,  2196,  2218,  2305,  2308,  2320,  2322,  2325,  2328,  2337,  2368,  2373,
2841
0
         2376,  2385,  2388,  2400,  2433,  2448,  2560,  2577,  2580,  2594,  2600,  2602,  2640,  2713,  4097,  4100,
2842
0
         4102,  4105,  4112,  4114,  4117,  4120,  4129,  4132,  4134,  4160,  4162,  4165,  4168,  4177,  4180,  4182,
2843
0
         4185,  4192,  4194,  4197,  4200,  4225,  4228,  4230,  4240,  4245,  4248,  4257,  4260,  4352,  4354,  4357,
2844
0
         4360,  4362,  4369,  4372,  4374,  4377,  4384,  4386,  4389,  4392,  4417,  4420,  4422,  4425,  4432,  4434,
2845
0
         4437,  4440,  4449,  4452,  4480,  4482,  4485,  4488,  4497,  4500,  4609,  4612,  4617,  4624,  4629,  4641,
2846
0
         4644,  4672,  4677,  4689,  4692,  4737,  4740,  4752,  5120,  5122,  5125,  5128,  5137,  5140,  5142,  5145,
2847
0
         5152,  5157,  5160,  5185,  5188,  5190,  5193,  5200,  5202,  5205,  5208,  5217,  5220,  5248,  5250,  5253,
2848
0
         5256,  5265,  5268,  5280,  5377,  5380,  5382,  5385,  5392,  5394,  5397,  5400,  5409,  5412,  5440,  5442,
2849
0
         5445,  5448,  5457,  5460,  5472,  5505,  5508,  5520,  5632,  5637,  5640,  5649,  5652,  5664,  5697,  5700,
2850
0
         5712,  5760,  5802,  6145,  6148,  6150,  6153,  6160,  6165,  6168,  6177,  6208,  6210,  6213,  6216,  6225,
2851
0
         6228,  6240,  6273,  6276,  6400,  6402,  6405,  6408,  6417,  6420,  6432,  6465,  6468,  6480,  6505,  6562,
2852
0
         6660,  6672,  6720,  6742,  8192,  8194,  8197,  8200,  8209,  8212,  8214,  8217,  8224,  8229,  8234,  8257,
2853
0
         8260,  8272,  8274,  8277,  8292,  8320,  8330,  8340,  8362,  8449,  8452,  8464,  8466,  8469,  8481,  8512,
2854
0
         8514,  8517,  8529,  8532,  8544,  8577,  8580,  8592,  8704,  8714,  8738,  8744,  8746,  8772,  8784,  8840,
2855
0
         8842,  8872,  9217,  9220,  9222,  9225,  9232,  9237,  9240,  9249,  9252,  9280,  9282,  9285,  9288,  9297,
2856
0
         9300,  9312,  9345,  9348,  9360,  9472,  9477,  9480,  9489,  9492,  9504,  9537,  9540,  9552,  9574,  9600,
2857
0
         9729,  9732,  9744,  9792,  9817, 10240, 10245, 10257, 10260, 10305, 10308, 10320, 10378, 10410, 10497, 10500,
2858
0
        10512, 10645, 10762, 10786, 10852, 10888, 10890, 16385, 16388, 16390, 16393, 16400, 16402, 16405, 16408, 16410,
2859
0
        16417, 16420, 16422, 16448, 16450, 16453, 16456, 16458, 16465, 16468, 16470, 16473, 16480, 16482, 16485, 16513,
2860
0
        16516, 16528, 16533, 16536, 16545, 16548, 16640, 16642, 16645, 16648, 16657, 16660, 16662, 16665, 16672, 16674,
2861
0
        16677, 16705, 16708, 16710, 16713, 16720, 16722, 16725, 16728, 16737, 16740, 16768, 16770, 16773, 16776, 16785,
2862
0
        16788, 16800, 16897, 16900, 16912, 16914, 16917, 16920, 16932, 16960, 16965, 16968, 16977, 16980, 16992, 17025,
2863
0
        17028, 17408, 17410, 17413, 17416, 17418, 17425, 17428, 17430, 17433, 17440, 17442, 17445, 17448, 17473, 17476,
2864
0
        17478, 17481, 17488, 17490, 17493, 17496, 17505, 17508, 17536, 17538, 17541, 17544, 17553, 17556, 17568, 17665,
2865
0
        17668, 17670, 17673, 17680, 17682, 17685, 17688, 17697, 17700, 17728, 17730, 17733, 17736, 17745, 17748, 17760,
2866
0
        17770, 17793, 17796, 17808, 17920, 17922, 17925, 17928, 17937, 17940, 17952, 17985, 17988, 18000, 18048, 18085,
2867
0
        18433, 18436, 18441, 18448, 18450, 18453, 18456, 18465, 18468, 18496, 18498, 18501, 18504, 18513, 18516, 18528,
2868
0
        18564, 18576, 18688, 18690, 18693, 18696, 18705, 18708, 18720, 18753, 18756, 18768, 18816, 18838, 18945, 18948,
2869
0
        18960, 19008, 20480, 20482, 20485, 20488, 20497, 20500, 20502, 20505, 20512, 20514, 20517, 20520, 20545, 20548,
2870
0
        20550, 20553, 20560, 20562, 20565, 20568, 20577, 20580, 20608, 20610, 20613, 20616, 20625, 20628, 20737, 20740,
2871
0
        20742, 20745, 20752, 20754, 20757, 20760, 20769, 20772, 20800, 20802, 20805, 20808, 20817, 20820, 20832, 20865,
2872
0
        20868, 20880, 20992, 20997, 21000, 21009, 21012, 21024, 21057, 21060, 21072, 21097, 21120, 21505, 21508, 21510,
2873
0
        21513, 21520, 21522, 21525, 21528, 21537, 21540, 21568, 21570, 21573, 21576, 21585, 21588, 21600, 21633, 21636,
2874
0
        21648, 21760, 21762, 21765, 21768, 21777, 21780, 21792, 21825, 21828, 21840, 21888, 22017, 22020, 22032, 22054,
2875
0
        22080, 22528, 22530, 22533, 22536, 22545, 22548, 22560, 22593, 22596, 22608, 22618, 22656, 22785, 22788, 22800,
2876
0
        22848, 23040, 23065, 23173, 23208, 24577, 24580, 24582, 24592, 24594, 24597, 24600, 24609, 24612, 24640, 24645,
2877
0
        24648, 24657, 24660, 24672, 24708, 24720, 24832, 24834, 24837, 24840, 24849, 24852, 24864, 24897, 24900, 24912,
2878
0
        24960, 24985, 25092, 25104, 25152, 25174, 25249, 25600, 25605, 25608, 25617, 25620, 25632, 25665, 25668, 25680,
2879
0
        25728, 25857, 25860, 25872, 25920, 25930, 25960, 26002, 26112, 26260, 26625, 26628, 26640, 26725, 26776, 26880,
2880
0
        26922, 27202, 27297, 32768, 32770, 32773, 32776, 32785, 32788, 32793, 32800, 32805, 32833, 32836, 32848, 32850,
2881
0
        32853, 32856, 32865, 32896, 32901, 32913, 32916, 33025, 33028, 33033, 33040, 33042, 33045, 33048, 33057, 33060,
2882
0
        33088, 33090, 33093, 33096, 33105, 33108, 33153, 33156, 33168, 33193, 33280, 33285, 33290, 33297, 33300, 33345,
2883
0
        33348, 33360, 33793, 33796, 33798, 33801, 33808, 33810, 33813, 33816, 33825, 33856, 33858, 33861, 33864, 33873,
2884
0
        33876, 33888, 33921, 33924, 33936, 34048, 34050, 34053, 34056, 34065, 34068, 34080, 34113, 34116, 34128, 34176,
2885
0
        34186, 34305, 34308, 34320, 34345, 34368, 34816, 34821, 34833, 34836, 34881, 34884, 34896, 34978, 35073, 35076,
2886
0
        35136, 35173, 35362, 35416, 35418, 35458, 35490, 36865, 36868, 36873, 36880, 36882, 36885, 36888, 36900, 36928,
2887
0
        36930, 36933, 36936, 36945, 36948, 36960, 36993, 36996, 37008, 37120, 37125, 37137, 37140, 37185, 37188, 37200,
2888
0
        37210, 37377, 37380, 37392, 37440, 37542, 37888, 37890, 37893, 37896, 37905, 37908, 37920, 37953, 37956, 37968,
2889
0
        38016, 38038, 38145, 38148, 38160, 38208, 38296, 38305, 38400, 38470, 38500, 38913, 38916, 38928, 38950, 38976,
2890
0
        39081, 39168, 39241, 39250, 39568, 40960, 40965, 40970, 40980, 40994, 41002, 41025, 41028, 41040, 41122, 41130,
2891
0
        41280, 41317, 41474, 41482, 41506, 41512, 41514, 41602, 41608, 41610, 41640, 41985, 41988, 42000, 42048, 42121,
2892
0
        42148, 42240, 42265, 42577, 43018, 43048, 43170, 43348, 43398, 43528, 43530, 43552, 43554, 43560, 43656, 43690,
2893
0
    };
2894
2895
0
    const int kmap_size = 43692;
2896
    //const int nwant = type == GGML_TYPE_IQ1_S ? 3 : 2;
2897
0
    const int nwant = type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? 3 : type == GGML_TYPE_IQ2_S ? 1 : 2;
2898
0
    const uint16_t * kgrid = type == GGML_TYPE_IQ2_XXS ? kgrid_2bit_256 :
2899
0
                             type == GGML_TYPE_IQ2_XS  ? kgrid_2bit_512 :
2900
0
                             type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M ? kgrid_1bit_2048 : kgrid_2bit_1024;
2901
0
    uint64_t * kgrid_q2xs;
2902
0
    int      * kmap_q2xs;
2903
0
    uint16_t * kneighbors_q2xs;
2904
2905
    //printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
2906
0
    uint64_t * the_grid = (uint64_t *)malloc(grid_size*sizeof(uint64_t));
2907
0
    for (int k = 0; k < grid_size; ++k) {
2908
0
        int8_t * pos = (int8_t *)(the_grid + k);
2909
0
        for (int i = 0; i < 8; ++i) {
2910
0
            int l = (kgrid[k] >> 2*i) & 0x3;
2911
0
            pos[i] = 2*l + 1;
2912
0
        }
2913
0
    }
2914
0
    kgrid_q2xs = the_grid;
2915
0
    iq2_data[gindex].grid = the_grid;
2916
0
    kmap_q2xs = (int *)malloc(kmap_size*sizeof(int));
2917
0
    iq2_data[gindex].map = kmap_q2xs;
2918
0
    for (int i = 0; i < kmap_size; ++i) kmap_q2xs[i] = -1;
2919
0
    uint64_t aux64;
2920
0
    uint8_t * aux8 = (uint8_t *)&aux64;
2921
0
    for (int i = 0; i < grid_size; ++i) {
2922
0
        aux64 = kgrid_q2xs[i];
2923
0
        uint16_t index = 0;
2924
0
        for (int k=0; k<8; ++k) {
2925
0
            uint16_t q = (aux8[k] - 1)/2;
2926
0
            index |= (q << 2*k);
2927
0
        }
2928
0
        kmap_q2xs[index] = i;
2929
0
    }
2930
0
    int8_t pos[8];
2931
0
    int * dist2 = (int *)malloc(2*grid_size*sizeof(int));
2932
0
    int num_neighbors = 0, num_not_in_map = 0;
2933
0
    for (int i = 0; i < kmap_size; ++i) {
2934
0
        if (kmap_q2xs[i] >= 0) continue;
2935
0
        ++num_not_in_map;
2936
0
        for (int k = 0; k < 8; ++k) {
2937
0
            int l = (i >> 2*k) & 0x3;
2938
0
            pos[k] = 2*l + 1;
2939
0
        }
2940
0
        for (int j = 0; j < grid_size; ++j) {
2941
0
            const int8_t * pg = (const int8_t *)(kgrid_q2xs + j);
2942
0
            int d2 = 0;
2943
0
            for (int k = 0; k < 8; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
2944
0
            dist2[2*j+0] = d2;
2945
0
            dist2[2*j+1] = j;
2946
0
        }
2947
0
        qsort(dist2, grid_size, 2*sizeof(int), iq2_compare_func);
2948
0
        int n = 0; int d2 = dist2[0];
2949
0
        int nhave = 1;
2950
0
        for (int j = 0; j < grid_size; ++j) {
2951
0
            if (dist2[2*j] > d2) {
2952
0
                if (nhave == nwant) break;
2953
0
                d2 = dist2[2*j];
2954
0
                ++nhave;
2955
0
            }
2956
0
            ++n;
2957
0
        }
2958
0
        num_neighbors += n;
2959
0
    }
2960
    //printf("%s: %d neighbours in total\n", __func__, num_neighbors);
2961
0
    kneighbors_q2xs = (uint16_t *)malloc((num_neighbors + num_not_in_map)*sizeof(uint16_t));
2962
0
    iq2_data[gindex].neighbours = kneighbors_q2xs;
2963
0
    int counter = 0;
2964
0
    for (int i = 0; i < kmap_size; ++i) {
2965
0
        if (kmap_q2xs[i] >= 0) continue;
2966
0
        for (int k = 0; k < 8; ++k) {
2967
0
            int l = (i >> 2*k) & 0x3;
2968
0
            pos[k] = 2*l + 1;
2969
0
        }
2970
0
        for (int j = 0; j < grid_size; ++j) {
2971
0
            const int8_t * pg = (const int8_t *)(kgrid_q2xs + j);
2972
0
            int d2 = 0;
2973
0
            for (int k = 0; k < 8; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
2974
0
            dist2[2*j+0] = d2;
2975
0
            dist2[2*j+1] = j;
2976
0
        }
2977
0
        qsort(dist2, grid_size, 2*sizeof(int), iq2_compare_func);
2978
0
        kmap_q2xs[i] = -(counter + 1);
2979
0
        int d2 = dist2[0];
2980
0
        uint16_t * start = &kneighbors_q2xs[counter++];
2981
0
        int n = 0, nhave = 1;
2982
0
        for (int j = 0; j < grid_size; ++j) {
2983
0
            if (dist2[2*j] > d2) {
2984
0
                if (nhave == nwant) break;
2985
0
                d2 = dist2[2*j];
2986
0
                ++nhave;
2987
0
            }
2988
0
            kneighbors_q2xs[counter++] = dist2[2*j+1];
2989
0
            ++n;
2990
0
        }
2991
0
        *start = n;
2992
0
    }
2993
0
    free(dist2);
2994
0
}
2995
2996
2.61k
void iq2xs_free_impl(enum ggml_type type) {
2997
2.61k
    GGML_ASSERT(type == GGML_TYPE_IQ2_XXS || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ1_S || type == GGML_TYPE_IQ1_M || type == GGML_TYPE_IQ2_S);
2998
2.61k
    const int gindex = iq2_data_index(type);
2999
2.61k
    if (iq2_data[gindex].grid) {
3000
0
        free(iq2_data[gindex].grid);       iq2_data[gindex].grid = NULL;
3001
0
        free(iq2_data[gindex].map);        iq2_data[gindex].map  = NULL;
3002
0
        free(iq2_data[gindex].neighbours); iq2_data[gindex].neighbours = NULL;
3003
0
    }
3004
2.61k
}
3005
3006
static int iq2_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
3007
0
        const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
3008
0
    int num_neighbors = neighbours[0];
3009
0
    GGML_ASSERT(num_neighbors > 0);
3010
0
    float best_d2 = FLT_MAX;
3011
0
    int grid_index = -1;
3012
0
    for (int j = 1; j <= num_neighbors; ++j) {
3013
0
        const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
3014
0
        float d2 = 0;
3015
0
        for (int i = 0; i < 8; ++i) {
3016
0
            float q = pg[i];
3017
0
            float diff = scale*q - xval[i];
3018
0
            d2 += weight[i]*diff*diff;
3019
0
        }
3020
0
        if (d2 < best_d2) {
3021
0
            best_d2 = d2; grid_index = neighbours[j];
3022
0
        }
3023
0
    }
3024
0
    GGML_ASSERT(grid_index >= 0);
3025
0
    const int8_t * pg = (const int8_t *)(grid + grid_index);
3026
0
    for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
3027
0
    return grid_index;
3028
0
}
3029
3030
0
static void quantize_row_iq2_xxs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
3031
3032
0
    const int gindex = iq2_data_index(GGML_TYPE_IQ2_XXS);
3033
3034
0
    const uint64_t * kgrid_q2xs      = iq2_data[gindex].grid;
3035
0
    const int      * kmap_q2xs       = iq2_data[gindex].map;
3036
0
    const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
3037
3038
0
    GGML_ASSERT(quant_weights   && "missing quantization weights");
3039
0
    GGML_ASSERT(kgrid_q2xs      && "forgot to call ggml_quantize_init()?");
3040
0
    GGML_ASSERT(kmap_q2xs       && "forgot to call ggml_quantize_init()?");
3041
0
    GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
3042
0
    GGML_ASSERT(n%QK_K == 0);
3043
3044
0
    const int kMaxQ = 3;
3045
3046
0
    const int64_t nbl = n/QK_K;
3047
3048
0
    block_iq2_xxs * y = vy;
3049
3050
0
    float scales[QK_K/32];
3051
0
    float weight[32];
3052
0
    float xval[32];
3053
0
    int8_t L[32];
3054
0
    int8_t Laux[32];
3055
0
    float  waux[32];
3056
0
    uint8_t block_signs[4];
3057
0
    uint32_t q2[2*(QK_K/32)];
3058
3059
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
3060
3061
0
        y[ibl].d = GGML_FP32_TO_FP16(0.f);
3062
0
        memset(q2, 0, QK_K/4);
3063
3064
0
        float max_scale = 0;
3065
3066
0
        const float * xbl = x + QK_K*ibl;
3067
0
        float sumx2 = 0;
3068
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
3069
0
        float sigma2 = sumx2/QK_K;
3070
3071
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
3072
0
            const float * xb = xbl + 32*ib;
3073
0
            const float * qw = quant_weights + QK_K*ibl + 32*ib;
3074
0
            for (int i = 0; i < 32; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
3075
0
            for (int i = 0; i < 32; ++i) waux[i] = sqrtf(weight[i]);
3076
0
            for (int k = 0; k < 4; ++k) {
3077
0
                int nflip = 0;
3078
0
                uint8_t s = 0;
3079
0
                for (int i = 0; i < 8; ++i) {
3080
0
                    if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
3081
0
                    else {
3082
0
                        xval[8*k + i] = -xb[8*k + i]; ++nflip; s |= (1 << i);
3083
0
                    }
3084
0
                }
3085
0
                if (nflip%2) {
3086
0
                    int imin = 0; float min = weight[8*k+imin]*xb[8*k+imin]*xb[8*k+imin];
3087
0
                    for (int i = 1; i < 8; ++i) {
3088
0
                        float ax = weight[8*k+i]*xb[8*k+i]*xb[8*k+i];
3089
0
                        if (ax < min) {
3090
0
                            min = ax; imin = i;
3091
0
                        }
3092
0
                    }
3093
0
                    xval[8*k+imin] = -xval[8*k+imin];
3094
0
                    s ^= (1 << imin);
3095
0
                }
3096
0
                block_signs[k] = s & 127;
3097
0
            }
3098
0
            float max = xval[0];
3099
0
            for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
3100
0
            if (max < GROUP_MAX_EPS) {
3101
0
                scales[ib] = 0;
3102
0
                memset(L, 0, 32);
3103
0
                continue;
3104
0
            }
3105
0
            float scale = make_qp_quants(32, kMaxQ+1, xval, (uint8_t*)L, weight);
3106
0
            float eff_max = scale*kMaxQ;
3107
0
            float best = 0;
3108
0
            for (int is = -6; is <= 6; ++is) {
3109
0
                float id = (2*kMaxQ-1+is*0.1f)/eff_max;
3110
0
                float this_scale = 1/id;
3111
0
                for (int k = 0; k < 4; ++k) {
3112
0
                    for (int i = 0; i < 8; ++i) {
3113
0
                        int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
3114
0
                        Laux[8*k+i] = MAX(0, MIN(kMaxQ-1, l));
3115
0
                    }
3116
0
                    uint16_t u = 0;
3117
0
                    for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i);
3118
0
                    int grid_index = kmap_q2xs[u];
3119
0
                    if (grid_index < 0) {
3120
0
                        const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
3121
0
                        grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
3122
0
                    }
3123
0
                }
3124
0
                float sumqx = 0, sumq2 = 0;
3125
0
                for (int i = 0; i < 32; ++i) {
3126
0
                    float w = weight[i];
3127
0
                    float q = 2*Laux[i] + 1;
3128
0
                    sumqx += w*xval[i]*q;
3129
0
                    sumq2 += w*q*q;
3130
0
                }
3131
0
                if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
3132
0
                    scale = sumqx/sumq2; best = scale*sumqx;
3133
0
                    memcpy(L, Laux, 32);
3134
0
                }
3135
0
            }
3136
0
            if (scale > 0) {
3137
0
                float id = 1/scale;
3138
0
                for (int k = 0; k < 4; ++k) {
3139
0
                    uint16_t u = 0;
3140
0
                    for (int i = 0; i < 8; ++i) {
3141
0
                        int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
3142
0
                        l = MAX(0, MIN(kMaxQ-1, l));
3143
0
                        u |= (l << 2*i);
3144
0
                    }
3145
0
                    int grid_index = kmap_q2xs[u];
3146
0
                    if (grid_index < 0) {
3147
0
                        const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
3148
0
                        grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
3149
0
                    }
3150
0
                    const int8_t * pg = (const int8_t *)(kgrid_q2xs + grid_index);
3151
0
                    for (int i = 0; i < 8; ++i) L[8*k+i] = (pg[i] - 1)/2;
3152
0
                }
3153
0
                float sumqx = 0, sumq2 = 0;
3154
0
                for (int i = 0; i < 32; ++i) {
3155
0
                    float w = weight[i];
3156
0
                    float q = 2*L[i] + 1;
3157
0
                    sumqx += w*xval[i]*q;
3158
0
                    sumq2 += w*q*q;
3159
0
                }
3160
0
                if (sumq2 > 0) scale = sumqx/sumq2;
3161
0
            }
3162
0
            if (scale < 0) {
3163
                // This should never happen, but just in case, flip scale so that it is positive (we use uint's to encode the scale)
3164
                // and correspondingly flip quant signs.
3165
0
                scale = -scale;
3166
0
                for (int k = 0; k < 4; ++k) block_signs[k] = (~block_signs[k]) & 127;
3167
0
            }
3168
0
            for (int k = 0; k < 4; ++k) {
3169
0
                uint16_t u = 0;
3170
0
                for (int i = 0; i < 8; ++i) u |= (L[8*k+i] << 2*i);
3171
0
                int grid_index = kmap_q2xs[u];
3172
0
                if (grid_index < 0) {
3173
0
                    printf("Oops: found point %u not on grid:", u);
3174
0
                    for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
3175
0
                    printf("\n");
3176
0
                    GGML_ABORT("fatal error");
3177
0
                }
3178
0
                q2[2*ib+0] |= ((uint32_t) grid_index << 8*k);
3179
0
                q2[2*ib+1] |= (block_signs[k] << 7*k);
3180
0
            }
3181
0
            GGML_ASSERT(scale >= 0);
3182
0
            scales[ib] = scale;
3183
0
            max_scale = MAX(max_scale, scale);
3184
0
        }
3185
3186
0
        if (!max_scale) {
3187
0
            memset(y[ibl].qs, 0, QK_K/4);
3188
0
            continue;
3189
0
        }
3190
3191
0
        float d = max_scale/31;
3192
0
        y[ibl].d = GGML_FP32_TO_FP16(d);
3193
0
        float id = 1/d;
3194
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
3195
0
            int l = nearest_int(0.5f*(id*scales[ib]-1));
3196
0
            l = MAX(0, MIN(15, l));
3197
0
            q2[2*ib+1] |= ((uint32_t)l << 28);
3198
0
        }
3199
0
        memcpy(y[ibl].qs, q2, QK_K/4);
3200
0
    }
3201
0
}
3202
3203
0
static void quantize_row_iq2_xs_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
3204
3205
0
    const int gindex = iq2_data_index(GGML_TYPE_IQ2_XS);
3206
3207
0
    const uint64_t * kgrid_q2xs      = iq2_data[gindex].grid;
3208
0
    const int      * kmap_q2xs       = iq2_data[gindex].map;
3209
0
    const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
3210
3211
0
    GGML_ASSERT(quant_weights   && "missing quantization weights");
3212
0
    GGML_ASSERT(kmap_q2xs       && "forgot to call ggml_quantize_init()?");
3213
0
    GGML_ASSERT(kgrid_q2xs      && "forgot to call ggml_quantize_init()?");
3214
0
    GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
3215
0
    GGML_ASSERT(n%QK_K == 0);
3216
3217
0
    const int kMaxQ = 3;
3218
3219
0
    const int64_t nbl = n/QK_K;
3220
3221
0
    block_iq2_xs * y = vy;
3222
3223
0
    float scales[QK_K/16];
3224
0
    float weight[16];
3225
0
    float xval[16];
3226
0
    int8_t L[16];
3227
0
    int8_t Laux[16];
3228
0
    float  waux[16];
3229
0
    bool   is_on_grid[2];
3230
0
    bool   is_on_grid_aux[2];
3231
0
    uint8_t block_signs[2];
3232
0
    uint16_t q2[2*(QK_K/16)];
3233
3234
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
3235
3236
0
        y[ibl].d = GGML_FP32_TO_FP16(0.f);
3237
0
        memset(q2, 0, QK_K/4);
3238
0
        memset(y[ibl].scales, 0, QK_K/32);
3239
3240
0
        float max_scale = 0;
3241
3242
0
        const float * xbl = x + QK_K*ibl;
3243
0
        float sumx2 = 0;
3244
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
3245
0
        float sigma2 = sumx2/QK_K;
3246
3247
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
3248
0
            const float * xb = xbl + 16*ib;
3249
0
            const float * qw = quant_weights + QK_K*ibl + 16*ib;
3250
0
            for (int i = 0; i < 16; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
3251
0
            for (int i = 0; i < 16; ++i) waux[i] = sqrtf(weight[i]);
3252
0
            for (int k = 0; k < 2; ++k) {
3253
0
                int nflip = 0;
3254
0
                uint8_t s = 0;
3255
0
                for (int i = 0; i < 8; ++i) {
3256
0
                    if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
3257
0
                    else {
3258
0
                        xval[8*k + i] = -xb[8*k + i]; ++nflip; s |= (1 << i);
3259
0
                    }
3260
0
                }
3261
0
                if (nflip%2) {
3262
0
                    int imin = 0; float min = weight[8*k+imin]*xb[8*k+imin]*xb[8*k+imin];
3263
0
                    for (int i = 1; i < 8; ++i) {
3264
0
                        float ax = weight[8*k+i]*xb[8*k+i]*xb[8*k+i];
3265
0
                        if (ax < min) {
3266
0
                            min = ax; imin = i;
3267
0
                        }
3268
0
                    }
3269
0
                    xval[8*k+imin] = -xval[8*k+imin];
3270
0
                    s ^= (1 << imin);
3271
0
                }
3272
0
                block_signs[k] = s & 127;
3273
0
            }
3274
0
            float max = xval[0];
3275
0
            for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
3276
0
            if (max < GROUP_MAX_EPS) {
3277
0
                scales[ib] = 0;
3278
0
                memset(L, 0, 16);
3279
0
                continue;
3280
0
            }
3281
0
            float best = 0;
3282
0
            float scale = max/(2*kMaxQ-1);
3283
0
            is_on_grid[0] = is_on_grid[1] = true;
3284
0
            for (int is = -9; is <= 9; ++is) {
3285
0
                float id = (2*kMaxQ-1+is*0.1f)/max;
3286
0
                float this_scale = 1/id;
3287
0
                for (int k = 0; k < 2; ++k) {
3288
0
                    for (int i = 0; i < 8; ++i) {
3289
0
                        int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
3290
0
                        Laux[8*k+i] = MAX(0, MIN(kMaxQ-1, l));
3291
0
                    }
3292
0
                    uint16_t u = 0;
3293
0
                    for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i);
3294
0
                    int grid_index = kmap_q2xs[u];
3295
0
                    is_on_grid_aux[k] = true;
3296
0
                    if (grid_index < 0) {
3297
0
                        is_on_grid_aux[k] = false;
3298
0
                        const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
3299
0
                        grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
3300
0
                    }
3301
0
                }
3302
0
                float sumqx = 0, sumq2 = 0;
3303
0
                for (int i = 0; i < 16; ++i) {
3304
0
                    float w = weight[i];
3305
0
                    float q = 2*Laux[i] + 1;
3306
0
                    sumqx += w*xval[i]*q;
3307
0
                    sumq2 += w*q*q;
3308
0
                }
3309
0
                if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
3310
0
                    scale = sumqx/sumq2; best = scale*sumqx;
3311
0
                    for (int i = 0; i < 16; ++i) L[i] = Laux[i];
3312
0
                    for (int k = 0; k <  2; ++k) is_on_grid[k] = is_on_grid_aux[k];
3313
0
                }
3314
0
            }
3315
0
            int n_not_ongrid = 0;
3316
0
            for (int k = 0; k < 2; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
3317
0
            if (n_not_ongrid > 0 && scale > 0) {
3318
0
                float id = 1/scale;
3319
0
                for (int k = 0; k < 2; ++k) {
3320
0
                    if (is_on_grid[k]) continue;
3321
0
                    uint16_t u = 0;
3322
0
                    for (int i = 0; i < 8; ++i) {
3323
0
                        int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
3324
0
                        l = MAX(0, MIN(kMaxQ-1, l));
3325
0
                        u |= (l << 2*i);
3326
0
                        L[8*k + i] = l;
3327
0
                    }
3328
0
                    int grid_index = kmap_q2xs[u];
3329
0
                    if (grid_index < 0) {
3330
0
                        const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
3331
0
                        grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
3332
0
                    }
3333
0
                }
3334
0
                float sumqx = 0, sumq2 = 0;
3335
0
                for (int i = 0; i < 16; ++i) {
3336
0
                    float w = weight[i];
3337
0
                    float q = 2*L[i] + 1;
3338
0
                    sumqx += w*xval[i]*q;
3339
0
                    sumq2 += w*q*q;
3340
0
                }
3341
0
                if (sumq2 > 0) scale = sumqx/sumq2;
3342
0
            }
3343
0
            if (scale < 0) {
3344
0
                scale = -scale;
3345
0
                for (int k = 0; k < 2; ++k) block_signs[k] = (~block_signs[k]) & 127;
3346
0
            }
3347
0
            for (int k = 0; k < 2; ++k) {
3348
0
                uint16_t u = 0;
3349
0
                for (int i = 0; i < 8; ++i) u |= (L[8*k+i] << 2*i);
3350
0
                int grid_index = kmap_q2xs[u];
3351
0
                if (grid_index < 0) {
3352
0
                    printf("Oops: found point %u not on grid:", u);
3353
0
                    for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
3354
0
                    printf("\n");
3355
0
                    GGML_ABORT("fatal error");
3356
0
                }
3357
0
                q2[2*ib+k] = grid_index | (block_signs[k] << 9);
3358
0
            }
3359
0
            GGML_ASSERT(scale >= 0);
3360
0
            scales[ib] = scale;
3361
0
            max_scale = MAX(max_scale, scale);
3362
0
        }
3363
3364
0
        if (!max_scale) {
3365
0
            memset(y[ibl].qs, 0, QK_K/4);
3366
0
            continue;
3367
0
        }
3368
3369
0
        float d = max_scale/31;
3370
0
        y[ibl].d = GGML_FP32_TO_FP16(d);
3371
0
        float id = 1/d;
3372
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
3373
0
            int l = nearest_int(0.5f*(id*scales[ib]-1));
3374
0
            l = MAX(0, MIN(15, l));
3375
0
            if (ib%2 == 0) y[ibl].scales[ib/2] = l;
3376
0
            else y[ibl].scales[ib/2] |= (l << 4);
3377
0
        }
3378
0
        memcpy(y[ibl].qs, q2, QK_K/4);
3379
3380
0
    }
3381
0
}
3382
3383
0
size_t quantize_iq2_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
3384
0
    GGML_ASSERT(n_per_row%QK_K == 0);
3385
0
    int64_t nblock = n_per_row/QK_K;
3386
0
    char * qrow = (char *)dst;
3387
0
    for (int64_t row = 0; row < nrow; ++row) {
3388
0
        quantize_row_iq2_xxs_impl(src, qrow, n_per_row, quant_weights);
3389
0
        src += n_per_row;
3390
0
        qrow += nblock*sizeof(block_iq2_xxs);
3391
0
    }
3392
0
    return nrow * nblock * sizeof(block_iq2_xxs);
3393
0
}
3394
3395
0
size_t quantize_iq2_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
3396
0
    GGML_ASSERT(n_per_row%QK_K == 0);
3397
0
    int64_t nblock = n_per_row/QK_K;
3398
0
    char * qrow = (char *)dst;
3399
0
    for (int64_t row = 0; row < nrow; ++row) {
3400
0
        quantize_row_iq2_xs_impl(src, qrow, n_per_row, quant_weights);
3401
0
        src += n_per_row;
3402
0
        qrow += nblock*sizeof(block_iq2_xs);
3403
0
    }
3404
0
    return nrow * nblock * sizeof(block_iq2_xs);
3405
0
}
3406
3407
//
3408
// ============================================= 3-bit using D4 lattice
3409
//
3410
3411
typedef struct {
3412
    uint32_t * grid;
3413
    int      * map;
3414
    uint16_t * neighbours;
3415
} iq3_entry_t;
3416
3417
static iq3_entry_t iq3_data[2] = {
3418
    {NULL, NULL, NULL},
3419
    {NULL, NULL, NULL},
3420
};
3421
3422
872
static inline int iq3_data_index(int grid_size) {
3423
872
    (void)grid_size;
3424
872
    GGML_ASSERT(grid_size == 256 || grid_size == 512);
3425
872
    return grid_size == 256 ? 0 : 1;
3426
872
}
3427
3428
0
static int iq3_compare_func(const void * left, const void * right) {
3429
0
    const int * l = (const int *)left;
3430
0
    const int * r = (const int *)right;
3431
0
    return l[0] < r[0] ? -1 : l[0] > r[0] ? 1 : l[1] < r[1] ? -1 : l[1] > r[1] ? 1 : 0;
3432
0
}
3433
3434
0
void iq3xs_init_impl(int grid_size) {
3435
0
    const int gindex = iq3_data_index(grid_size);
3436
0
    if (iq3_data[gindex].grid) {
3437
0
        return;
3438
0
    }
3439
0
    static const uint16_t kgrid_256[256] = {
3440
0
            0,     2,     4,     9,    11,    15,    16,    18,    25,    34,    59,    61,    65,    67,    72,    74,
3441
0
           81,    85,    88,    90,    97,   108,   120,   128,   130,   132,   137,   144,   146,   153,   155,   159,
3442
0
          169,   175,   189,   193,   199,   200,   202,   213,   248,   267,   287,   292,   303,   315,   317,   321,
3443
0
          327,   346,   362,   413,   436,   456,   460,   462,   483,   497,   513,   515,   520,   522,   529,   531,
3444
0
          536,   538,   540,   551,   552,   576,   578,   585,   592,   594,   641,   643,   648,   650,   657,   664,
3445
0
          698,   704,   706,   720,   729,   742,   758,   769,   773,   808,   848,   852,   870,   889,   901,   978,
3446
0
          992,  1024,  1026,  1033,  1035,  1040,  1042,  1046,  1049,  1058,  1089,  1091,  1093,  1096,  1098,  1105,
3447
0
         1112,  1139,  1143,  1144,  1152,  1154,  1161,  1167,  1168,  1170,  1183,  1184,  1197,  1217,  1224,  1228,
3448
0
         1272,  1276,  1309,  1323,  1347,  1367,  1377,  1404,  1473,  1475,  1486,  1509,  1537,  1544,  1546,  1553,
3449
0
         1555,  1576,  1589,  1594,  1600,  1602,  1616,  1625,  1636,  1638,  1665,  1667,  1672,  1685,  1706,  1722,
3450
0
         1737,  1755,  1816,  1831,  1850,  1856,  1862,  1874,  1901,  1932,  1950,  1971,  2011,  2032,  2052,  2063,
3451
0
         2077,  2079,  2091,  2095,  2172,  2192,  2207,  2208,  2224,  2230,  2247,  2277,  2308,  2345,  2356,  2389,
3452
0
         2403,  2424,  2501,  2504,  2506,  2520,  2570,  2593,  2616,  2624,  2630,  2646,  2669,  2700,  2714,  2746,
3453
0
         2754,  2795,  2824,  2835,  2839,  2874,  2882,  2905,  2984,  3028,  3042,  3092,  3108,  3110,  3124,  3153,
3454
0
         3185,  3215,  3252,  3288,  3294,  3364,  3397,  3434,  3483,  3523,  3537,  3587,  3589,  3591,  3592,  3610,
3455
0
         3626,  3670,  3680,  3722,  3749,  3754,  3776,  3789,  3803,  3824,  3857,  3873,  3904,  3906,  3924,  3992,
3456
0
    };
3457
0
    static const uint16_t kgrid_512[512] = {
3458
0
            0,     1,     2,     5,     7,     8,     9,    10,    12,    14,    16,    17,    21,    27,    32,    34,
3459
0
           37,    39,    41,    43,    48,    50,    57,    60,    63,    64,    65,    66,    68,    72,    73,    77,
3460
0
           80,    83,    87,    89,    93,   100,   113,   117,   122,   128,   129,   133,   135,   136,   139,   142,
3461
0
          145,   149,   152,   156,   162,   165,   167,   169,   171,   184,   187,   195,   201,   205,   208,   210,
3462
0
          217,   219,   222,   228,   232,   234,   247,   249,   253,   256,   267,   271,   273,   276,   282,   288,
3463
0
          291,   297,   312,   322,   324,   336,   338,   342,   347,   353,   357,   359,   374,   379,   390,   393,
3464
0
          395,   409,   426,   441,   448,   450,   452,   464,   466,   470,   475,   488,   492,   512,   513,   514,
3465
0
          516,   520,   521,   523,   525,   527,   528,   530,   537,   540,   542,   556,   558,   561,   570,   576,
3466
0
          577,   579,   582,   584,   588,   593,   600,   603,   609,   616,   618,   632,   638,   640,   650,   653,
3467
0
          655,   656,   660,   666,   672,   675,   685,   688,   698,   705,   708,   711,   712,   715,   721,   727,
3468
0
          728,   732,   737,   754,   760,   771,   773,   778,   780,   793,   795,   802,   806,   808,   812,   833,
3469
0
          840,   843,   849,   856,   858,   873,   912,   916,   919,   932,   934,   961,   963,   968,   970,   977,
3470
0
          989,   993,  1010,  1016,  1024,  1025,  1027,  1029,  1031,  1032,  1034,  1036,  1038,  1041,  1043,  1047,
3471
0
         1048,  1050,  1057,  1059,  1061,  1064,  1066,  1079,  1080,  1083,  1085,  1088,  1090,  1096,  1099,  1103,
3472
0
         1106,  1109,  1113,  1116,  1122,  1129,  1153,  1156,  1159,  1169,  1171,  1176,  1183,  1185,  1195,  1199,
3473
0
         1209,  1212,  1216,  1218,  1221,  1225,  1234,  1236,  1241,  1243,  1250,  1256,  1270,  1281,  1287,  1296,
3474
0
         1299,  1306,  1309,  1313,  1338,  1341,  1348,  1353,  1362,  1375,  1376,  1387,  1400,  1408,  1410,  1415,
3475
0
         1425,  1453,  1457,  1477,  1481,  1494,  1496,  1507,  1512,  1538,  1545,  1547,  1549,  1551,  1554,  1561,
3476
0
         1563,  1565,  1570,  1572,  1575,  1577,  1587,  1593,  1601,  1603,  1605,  1612,  1617,  1619,  1632,  1648,
3477
0
         1658,  1662,  1664,  1674,  1680,  1690,  1692,  1704,  1729,  1736,  1740,  1745,  1747,  1751,  1752,  1761,
3478
0
         1763,  1767,  1773,  1787,  1795,  1801,  1806,  1810,  1817,  1834,  1840,  1844,  1857,  1864,  1866,  1877,
3479
0
         1882,  1892,  1902,  1915,  1934,  1953,  1985,  1987,  2000,  2002,  2013,  2048,  2052,  2058,  2064,  2068,
3480
0
         2071,  2074,  2081,  2088,  2104,  2114,  2119,  2121,  2123,  2130,  2136,  2141,  2147,  2153,  2157,  2177,
3481
0
         2179,  2184,  2189,  2193,  2203,  2208,  2223,  2226,  2232,  2244,  2249,  2251,  2256,  2258,  2265,  2269,
3482
0
         2304,  2306,  2324,  2335,  2336,  2361,  2373,  2375,  2385,  2418,  2443,  2460,  2480,  2504,  2509,  2520,
3483
0
         2531,  2537,  2562,  2568,  2572,  2578,  2592,  2596,  2599,  2602,  2614,  2620,  2625,  2627,  2629,  2634,
3484
0
         2641,  2650,  2682,  2688,  2697,  2707,  2712,  2718,  2731,  2754,  2759,  2760,  2775,  2788,  2793,  2805,
3485
0
         2811,  2817,  2820,  2832,  2842,  2854,  2890,  2902,  2921,  2923,  2978,  3010,  3012,  3026,  3081,  3083,
3486
0
         3085,  3097,  3099,  3120,  3136,  3152,  3159,  3188,  3210,  3228,  3234,  3245,  3250,  3256,  3264,  3276,
3487
0
         3281,  3296,  3349,  3363,  3378,  3392,  3395,  3420,  3440,  3461,  3488,  3529,  3531,  3584,  3588,  3591,
3488
0
         3600,  3602,  3614,  3616,  3628,  3634,  3650,  3657,  3668,  3683,  3685,  3713,  3716,  3720,  3726,  3729,
3489
0
         3736,  3753,  3778,  3802,  3805,  3819,  3841,  3845,  3851,  3856,  3880,  3922,  3938,  3970,  3993,  4032,
3490
0
    };
3491
3492
0
    const int kmap_size = 4096;
3493
0
    const int nwant = grid_size == 256 ? 2 : 3;
3494
0
    const uint16_t * kgrid = grid_size == 256 ? kgrid_256 : kgrid_512;
3495
0
    uint32_t * kgrid_q3xs;
3496
0
    int      * kmap_q3xs;
3497
0
    uint16_t * kneighbors_q3xs;
3498
3499
    //printf("================================================================= %s(grid_size = %d)\n", __func__, grid_size);
3500
0
    uint32_t * the_grid = (uint32_t *)malloc(grid_size*sizeof(uint32_t));
3501
0
    for (int k = 0; k < grid_size; ++k) {
3502
0
        int8_t * pos = (int8_t *)(the_grid + k);
3503
0
        for (int i = 0; i < 4; ++i) {
3504
0
            int l = (kgrid[k] >> 3*i) & 0x7;
3505
0
            pos[i] = 2*l + 1;
3506
0
        }
3507
0
    }
3508
0
    kgrid_q3xs = the_grid;
3509
0
    iq3_data[gindex].grid = the_grid;
3510
0
    kmap_q3xs = (int *)malloc(kmap_size*sizeof(int));
3511
0
    iq3_data[gindex].map = kmap_q3xs;
3512
0
    for (int i = 0; i < kmap_size; ++i) kmap_q3xs[i] = -1;
3513
0
    uint32_t aux32;
3514
0
    uint8_t * aux8 = (uint8_t *)&aux32;
3515
0
    for (int i = 0; i < grid_size; ++i) {
3516
0
        aux32 = kgrid_q3xs[i];
3517
0
        uint16_t index = 0;
3518
0
        for (int k=0; k<4; ++k) {
3519
0
            uint16_t q = (aux8[k] - 1)/2;
3520
0
            index |= (q << 3*k);
3521
0
        }
3522
0
        kmap_q3xs[index] = i;
3523
0
    }
3524
0
    int8_t pos[4];
3525
0
    int * dist2 = (int *)malloc(2*grid_size*sizeof(int));
3526
0
    int num_neighbors = 0, num_not_in_map = 0;
3527
0
    for (int i = 0; i < kmap_size; ++i) {
3528
0
        if (kmap_q3xs[i] >= 0) continue;
3529
0
        ++num_not_in_map;
3530
0
        for (int k = 0; k < 4; ++k) {
3531
0
            int l = (i >> 3*k) & 0x7;
3532
0
            pos[k] = 2*l + 1;
3533
0
        }
3534
0
        for (int j = 0; j < grid_size; ++j) {
3535
0
            const int8_t * pg = (const int8_t *)(kgrid_q3xs + j);
3536
0
            int d2 = 0;
3537
0
            for (int k = 0; k < 4; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
3538
0
            dist2[2*j+0] = d2;
3539
0
            dist2[2*j+1] = j;
3540
0
        }
3541
0
        qsort(dist2, grid_size, 2*sizeof(int), iq3_compare_func);
3542
0
        int n = 0; int d2 = dist2[0];
3543
0
        int nhave = 1;
3544
0
        for (int j = 0; j < grid_size; ++j) {
3545
0
            if (dist2[2*j] > d2) {
3546
0
                if (nhave == nwant) break;
3547
0
                d2 = dist2[2*j];
3548
0
                ++nhave;
3549
0
            }
3550
0
            ++n;
3551
0
        }
3552
0
        num_neighbors += n;
3553
0
    }
3554
    //printf("%s: %d neighbours in total\n", __func__, num_neighbors);
3555
0
    kneighbors_q3xs = (uint16_t *)malloc((num_neighbors + num_not_in_map)*sizeof(uint16_t));
3556
0
    iq3_data[gindex].neighbours = kneighbors_q3xs;
3557
0
    int counter = 0;
3558
0
    for (int i = 0; i < kmap_size; ++i) {
3559
0
        if (kmap_q3xs[i] >= 0) continue;
3560
0
        for (int k = 0; k < 4; ++k) {
3561
0
            int l = (i >> 3*k) & 0x7;
3562
0
            pos[k] = 2*l + 1;
3563
0
        }
3564
0
        for (int j = 0; j < grid_size; ++j) {
3565
0
            const int8_t * pg = (const int8_t *)(kgrid_q3xs + j);
3566
0
            int d2 = 0;
3567
0
            for (int k = 0; k < 4; ++k) d2 += (pg[k] - pos[k])*(pg[k] - pos[k]);
3568
0
            dist2[2*j+0] = d2;
3569
0
            dist2[2*j+1] = j;
3570
0
        }
3571
0
        qsort(dist2, grid_size, 2*sizeof(int), iq3_compare_func);
3572
0
        kmap_q3xs[i] = -(counter + 1);
3573
0
        int d2 = dist2[0];
3574
0
        uint16_t * start = &kneighbors_q3xs[counter++];
3575
0
        int n = 0, nhave = 1;
3576
0
        for (int j = 0; j < grid_size; ++j) {
3577
0
            if (dist2[2*j] > d2) {
3578
0
                if (nhave == nwant) break;
3579
0
                d2 = dist2[2*j];
3580
0
                ++nhave;
3581
0
            }
3582
0
            kneighbors_q3xs[counter++] = dist2[2*j+1];
3583
0
            ++n;
3584
0
        }
3585
0
        *start = n;
3586
0
    }
3587
0
    free(dist2);
3588
0
}
3589
3590
872
void iq3xs_free_impl(int grid_size) {
3591
872
    GGML_ASSERT(grid_size == 256 || grid_size == 512);
3592
872
    const int gindex = iq3_data_index(grid_size);
3593
872
    if (iq3_data[gindex].grid) {
3594
0
        free(iq3_data[gindex].grid);       iq3_data[gindex].grid = NULL;
3595
0
        free(iq3_data[gindex].map);        iq3_data[gindex].map  = NULL;
3596
0
        free(iq3_data[gindex].neighbours); iq3_data[gindex].neighbours = NULL;
3597
0
    }
3598
872
}
3599
3600
static int iq3_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint32_t * GGML_RESTRICT grid,
3601
0
        const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, int8_t * GGML_RESTRICT L) {
3602
0
    int num_neighbors = neighbours[0];
3603
0
    GGML_ASSERT(num_neighbors > 0);
3604
0
    float best_d2 = FLT_MAX;
3605
0
    int grid_index = -1;
3606
0
    for (int j = 1; j <= num_neighbors; ++j) {
3607
0
        const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
3608
0
        float d2 = 0;
3609
0
        for (int i = 0; i < 4; ++i) {
3610
0
            float q = pg[i];
3611
0
            float diff = scale*q - xval[i];
3612
0
            d2 += weight[i]*diff*diff;
3613
0
        }
3614
0
        if (d2 < best_d2) {
3615
0
            best_d2 = d2; grid_index = neighbours[j];
3616
0
        }
3617
0
    }
3618
0
    GGML_ASSERT(grid_index >= 0);
3619
0
    const int8_t * pg = (const int8_t *)(grid + grid_index);
3620
0
    for (int i = 0; i < 4; ++i) L[i] = (pg[i] - 1)/2;
3621
0
    return grid_index;
3622
0
}
3623
3624
static void quantize_row_iq3_xxs_impl(int grid_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n,
3625
0
        const float * GGML_RESTRICT quant_weights) {
3626
3627
0
    const int gindex = iq3_data_index(grid_size);
3628
3629
0
    const uint32_t * kgrid_q3xs      = iq3_data[gindex].grid;
3630
0
    const int      * kmap_q3xs       = iq3_data[gindex].map;
3631
0
    const uint16_t * kneighbors_q3xs = iq3_data[gindex].neighbours;
3632
3633
    //GGML_ASSERT(quant_weights   && "missing quantization weights");
3634
0
    GGML_ASSERT(kgrid_q3xs      && "forgot to call ggml_quantize_init()?");
3635
0
    GGML_ASSERT(kmap_q3xs       && "forgot to call ggml_quantize_init()?");
3636
0
    GGML_ASSERT(kneighbors_q3xs && "forgot to call ggml_quantize_init()?");
3637
0
    GGML_ASSERT(n%QK_K == 0);
3638
3639
0
    const int kMaxQ = 8;
3640
3641
0
    const int64_t nbl = n/QK_K;
3642
3643
0
    ggml_fp16_t * dh;
3644
0
    uint8_t * qs;
3645
0
    int block_size;
3646
0
    if (grid_size == 256) {
3647
0
        block_iq3_xxs * y = vy;
3648
0
        dh = &y->d;
3649
0
        qs = y->qs;
3650
0
        block_size = sizeof(block_iq3_xxs);
3651
0
    } else {
3652
0
        block_iq3_s * y = vy;
3653
0
        dh = &y->d;
3654
0
        qs = y->qs;
3655
0
        block_size = sizeof(block_iq3_s);
3656
0
    }
3657
0
    int quant_size = block_size - sizeof(ggml_fp16_t);
3658
3659
0
    float scales[QK_K/32];
3660
0
    float weight[32];
3661
0
    float xval[32];
3662
0
    int8_t L[32];
3663
0
    int8_t Laux[32];
3664
0
    float  waux[32];
3665
0
    bool   is_on_grid[8];
3666
0
    bool   is_on_grid_aux[8];
3667
0
    uint8_t block_signs[8];
3668
0
    uint8_t q3[3*(QK_K/8)+QK_K/32];
3669
0
    uint32_t * scales_and_signs = (uint32_t *)(q3 + QK_K/4);
3670
0
    uint8_t  * qh = q3 + 3*(QK_K/8);
3671
3672
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
3673
3674
0
        dh[0] = GGML_FP32_TO_FP16(0.f);
3675
0
        memset(q3, 0, 3*QK_K/8+QK_K/32);
3676
3677
0
        float max_scale = 0;
3678
3679
0
        const float * xbl = x + QK_K*ibl;
3680
0
        float sumx2 = 0;
3681
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
3682
0
        float sigma2 = 2*sumx2/QK_K;
3683
3684
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
3685
0
            const float * xb = xbl + 32*ib;
3686
0
            if (quant_weights) {
3687
0
                const float * qw = quant_weights + QK_K*ibl + 32*ib;
3688
0
                for (int i = 0; i < 32; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
3689
0
            } else {
3690
0
                for (int i = 0; i < 32; ++i) weight[i] = xb[i]*xb[i];
3691
0
            }
3692
0
            for (int i = 0; i < 32; ++i) waux[i] = sqrtf(weight[i]);
3693
0
            for (int k = 0; k < 4; ++k) {
3694
0
                int nflip = 0;
3695
0
                uint8_t s = 0;
3696
0
                for (int i = 0; i < 8; ++i) {
3697
0
                    if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
3698
0
                    else {
3699
0
                        xval[8*k + i] = -xb[8*k + i]; ++nflip; s |= (1 << i);
3700
0
                    }
3701
0
                }
3702
0
                if (nflip%2) {
3703
0
                    int imin = 0; float min = weight[8*k+imin]*xb[8*k+imin]*xb[8*k+imin];
3704
0
                    for (int i = 1; i < 8; ++i) {
3705
0
                        float ax = weight[8*k+i]*xb[8*k+i]*xb[8*k+i];
3706
0
                        if (ax < min) {
3707
0
                            min = ax; imin = i;
3708
0
                        }
3709
0
                    }
3710
0
                    xval[8*k+imin] = -xval[8*k+imin];
3711
0
                    s ^= (1 << imin);
3712
0
                }
3713
0
                block_signs[k] = s & 127;
3714
0
            }
3715
0
            float max = xval[0];
3716
0
            for (int i = 1; i < 32; ++i) max = MAX(max, xval[i]);
3717
0
            if (max < GROUP_MAX_EPS_IQ3_XXS) {
3718
0
                scales[ib] = 0;
3719
0
                memset(L, 0, 32);
3720
0
                continue;
3721
0
            }
3722
0
            float best = 0;
3723
0
            float scale = max/(2*kMaxQ-1);
3724
0
            for (int k = 0; k < 8; ++k) is_on_grid[k] = true;
3725
0
            for (int is = -15; is <= 15; ++is) {
3726
0
                float id = (2*kMaxQ-1+is*0.2f)/max;
3727
0
                float this_scale = 1/id;
3728
0
                for (int k = 0; k < 8; ++k) {
3729
0
                    for (int i = 0; i < 4; ++i) {
3730
0
                        int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
3731
0
                        Laux[4*k+i] = MAX(0, MIN(kMaxQ-1, l));
3732
0
                    }
3733
0
                    uint16_t u = 0;
3734
0
                    for (int i = 0; i < 4; ++i) u |= (Laux[4*k+i] << 3*i);
3735
0
                    int grid_index = kmap_q3xs[u];
3736
0
                    is_on_grid_aux[k] = true;
3737
0
                    if (grid_index < 0) {
3738
0
                        is_on_grid_aux[k] = false;
3739
0
                        const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
3740
0
                        grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, this_scale, Laux + 4*k);
3741
0
                    }
3742
0
                }
3743
0
                float sumqx = 0, sumq2 = 0;
3744
0
                for (int i = 0; i < 32; ++i) {
3745
0
                    float w = weight[i];
3746
0
                    float q = 2*Laux[i] + 1;
3747
0
                    sumqx += w*xval[i]*q;
3748
0
                    sumq2 += w*q*q;
3749
0
                }
3750
0
                if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
3751
0
                    scale = sumqx/sumq2; best = scale*sumqx;
3752
0
                    for (int i = 0; i < 32; ++i) L[i] = Laux[i];
3753
0
                    for (int k = 0; k <  8; ++k) is_on_grid[k] = is_on_grid_aux[k];
3754
0
                }
3755
0
            }
3756
0
            int n_not_ongrid = 0;
3757
0
            for (int k = 0; k < 8; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
3758
0
            if (n_not_ongrid > 0 && scale > 0) {
3759
0
                float id = 1/scale;
3760
0
                for (int k = 0; k < 8; ++k) {
3761
0
                    if (is_on_grid[k]) continue;
3762
0
                    uint16_t u = 0;
3763
0
                    for (int i = 0; i < 4; ++i) {
3764
0
                        int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
3765
0
                        l = MAX(0, MIN(kMaxQ-1, l));
3766
0
                        u |= (l << 3*i);
3767
0
                    }
3768
0
                    int grid_index = kmap_q3xs[u];
3769
0
                    if (grid_index < 0) {
3770
0
                        const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
3771
0
                        grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, scale, L + 4*k);
3772
0
                    }
3773
0
                    const int8_t * pg = (const int8_t *)(kgrid_q3xs + grid_index);
3774
0
                    for (int i = 0; i < 4; ++i) L[4*k+i] = (pg[i] - 1)/2;
3775
0
                }
3776
0
                float sumqx = 0, sumq2 = 0;
3777
0
                for (int i = 0; i < 32; ++i) {
3778
0
                    float w = weight[i];
3779
0
                    float q = 2*L[i] + 1;
3780
0
                    sumqx += w*xval[i]*q;
3781
0
                    sumq2 += w*q*q;
3782
0
                }
3783
0
                if (sumq2 > 0) scale = sumqx/sumq2;
3784
0
            }
3785
0
            if (scale < 0) {
3786
                // This should never happen, but just in case, flip scale so that it is positive (we use uint's to encode the scale)
3787
                // and correspondingly flip quant signs.
3788
0
                scale = -scale;
3789
0
                for (int k = 0; k < 4; ++k) block_signs[k] = (~block_signs[k]) & 127;
3790
0
            }
3791
0
            for (int k = 0; k < 8; ++k) {
3792
0
                uint16_t u = 0;
3793
0
                for (int i = 0; i < 4; ++i) u |= (L[4*k+i] << 3*i);
3794
0
                int grid_index = kmap_q3xs[u];
3795
0
                if (grid_index < 0) {
3796
0
                    printf("Oops: found point %u not on grid:", u);
3797
0
                    for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
3798
0
                    printf("\n");
3799
0
                    GGML_ABORT("fatal error");
3800
0
                }
3801
0
                if (grid_size == 256) {
3802
0
                    q3[8*ib+k] = grid_index;
3803
0
                } else {
3804
0
                    q3[8*ib+k] = grid_index & 255;
3805
0
                    qh[ib] |= ((grid_index >> 8) << k);
3806
0
                }
3807
3808
0
            }
3809
0
            scales_and_signs[ib] = block_signs[0] | (block_signs[1] << 7) | (block_signs[2] << 14) | (block_signs[3] << 21);
3810
0
            GGML_ASSERT(scale >= 0);
3811
0
            scales[ib] = scale;
3812
0
            max_scale = MAX(max_scale, scale);
3813
0
        }
3814
3815
0
        if (!max_scale) {
3816
0
            memset(qs, 0, quant_size);
3817
0
            dh += block_size/sizeof(ggml_fp16_t);
3818
0
            qs += block_size;
3819
0
            continue;
3820
0
        }
3821
3822
0
        float d = max_scale/31;
3823
0
        dh[0] = GGML_FP32_TO_FP16(d * 1.0125f);  // small improvement via this fudge factor
3824
0
        float id = 1/d;
3825
0
        for (int ib = 0; ib < QK_K/32; ++ib) {
3826
0
            int l = nearest_int(0.5f*(id*scales[ib]-1));
3827
0
            l = MAX(0, MIN(15, l));
3828
0
            scales_and_signs[ib] |= ((uint32_t)l << 28);
3829
0
        }
3830
0
        memcpy(qs, q3, quant_size);
3831
3832
0
        dh += block_size/sizeof(ggml_fp16_t);
3833
0
        qs += block_size;
3834
3835
0
    }
3836
0
}
3837
3838
0
size_t quantize_iq3_xxs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
3839
0
    GGML_ASSERT(n_per_row%QK_K == 0);
3840
0
    int64_t nblock = n_per_row/QK_K;
3841
0
    char * qrow = (char *)dst;
3842
0
    for (int64_t row = 0; row < nrow; ++row) {
3843
0
        quantize_row_iq3_xxs_impl(256, src, qrow, n_per_row, quant_weights);
3844
0
        src += n_per_row;
3845
0
        qrow += nblock*sizeof(block_iq3_xxs);
3846
0
    }
3847
0
    return nrow * nblock * sizeof(block_iq3_xxs);
3848
0
}
3849
3850
0
void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k) {
3851
0
    assert(k % QK_K == 0);
3852
0
    quantize_row_iq3_xxs_impl(256, x, y, k, NULL);
3853
0
}
3854
3855
static void quantize_row_iq3_s_impl(int block_size, const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int n,
3856
        const float * GGML_RESTRICT quant_weights,
3857
        float   * scales,
3858
        float   * weight,
3859
        float   * xval,
3860
        int8_t  * L,
3861
        int8_t  * Laux,
3862
        float   * waux,
3863
        bool    * is_on_grid,
3864
        bool    * is_on_grid_aux,
3865
0
        uint8_t * block_signs) {
3866
3867
0
    const int gindex = iq3_data_index(512);
3868
3869
0
    const uint32_t * kgrid_q3xs      = iq3_data[gindex].grid;
3870
0
    const int      * kmap_q3xs       = iq3_data[gindex].map;
3871
0
    const uint16_t * kneighbors_q3xs = iq3_data[gindex].neighbours;
3872
3873
    //GGML_ASSERT(quant_weights   && "missing quantization weights");
3874
0
    GGML_ASSERT(kgrid_q3xs      && "forgot to call ggml_quantize_init()?");
3875
0
    GGML_ASSERT(kmap_q3xs       && "forgot to call ggml_quantize_init()?");
3876
0
    GGML_ASSERT(kneighbors_q3xs && "forgot to call ggml_quantize_init()?");
3877
0
    GGML_ASSERT(n%QK_K == 0);
3878
3879
0
    const int kMaxQ = 8;
3880
3881
0
    const int64_t nbl = n/QK_K;
3882
3883
0
    block_iq3_s * y = vy;
3884
3885
0
    const int bs4 = block_size/4;
3886
0
    const int bs8 = block_size/8;
3887
3888
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
3889
3890
0
        memset(&y[ibl], 0, sizeof(block_iq3_s));
3891
0
        y[ibl].d = GGML_FP32_TO_FP16(0.f);
3892
3893
0
        uint8_t * qs = y[ibl].qs;
3894
0
        uint8_t * qh = y[ibl].qh;
3895
0
        uint8_t * signs = y[ibl].signs;
3896
3897
0
        float max_scale = 0;
3898
3899
0
        const float * xbl = x + QK_K*ibl;
3900
0
        float sumx2 = 0;
3901
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
3902
0
        float sigma2 = 2*sumx2/QK_K;
3903
3904
0
        for (int ib = 0; ib < QK_K/block_size; ++ib) {
3905
0
            const float * xb = xbl + block_size*ib;
3906
0
            if (quant_weights) {
3907
0
                const float * qw = quant_weights + QK_K*ibl + block_size*ib;
3908
0
                for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
3909
0
            } else {
3910
0
                for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
3911
0
            }
3912
0
            for (int i = 0; i < block_size; ++i) waux[i] = sqrtf(weight[i]);
3913
0
            for (int k = 0; k < bs8; ++k) {
3914
0
                uint8_t s = 0;
3915
0
                for (int i = 0; i < 8; ++i) {
3916
0
                    if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
3917
0
                    else {
3918
0
                        xval[8*k + i] = -xb[8*k + i]; s |= (1 << i);
3919
0
                    }
3920
0
                }
3921
0
                block_signs[k] = s;
3922
0
            }
3923
0
            float max = xval[0];
3924
0
            for (int i = 1; i < block_size; ++i) max = MAX(max, xval[i]);
3925
0
            if (!max) {
3926
0
                scales[ib] = 0;
3927
0
                continue;
3928
0
            }
3929
0
            float best = 0;
3930
0
            float scale = max/(2*kMaxQ-1);
3931
0
            for (int k = 0; k < bs4; ++k) is_on_grid[k] = false;
3932
0
            for (int is = -9; is <= 9; ++is) {
3933
0
                float id = (2*kMaxQ-1+is*0.2f)/max;
3934
0
                float this_scale = 1/id;
3935
0
                for (int k = 0; k < bs4; ++k) {
3936
0
                    for (int i = 0; i < 4; ++i) {
3937
0
                        int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
3938
0
                        Laux[4*k+i] = MAX(0, MIN(kMaxQ-1, l));
3939
0
                    }
3940
0
                    uint16_t u = 0;
3941
0
                    for (int i = 0; i < 4; ++i) u |= (Laux[4*k+i] << 3*i);
3942
0
                    int grid_index = kmap_q3xs[u];
3943
0
                    is_on_grid_aux[k] = true;
3944
0
                    if (grid_index < 0) {
3945
0
                        is_on_grid_aux[k] = false;
3946
0
                        const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
3947
0
                        grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, this_scale, Laux + 4*k);
3948
0
                    }
3949
0
                }
3950
0
                float sumqx = 0, sumq2 = 0;
3951
0
                for (int i = 0; i < block_size; ++i) {
3952
0
                    float w = weight[i];
3953
0
                    float q = 2*Laux[i] + 1;
3954
0
                    sumqx += w*xval[i]*q;
3955
0
                    sumq2 += w*q*q;
3956
0
                }
3957
0
                if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
3958
0
                    scale = sumqx/sumq2; best = scale*sumqx;
3959
0
                    for (int i = 0; i < block_size; ++i) L[i] = Laux[i];
3960
0
                    for (int k = 0; k < bs4; ++k) is_on_grid[k] = is_on_grid_aux[k];
3961
0
                }
3962
0
            }
3963
0
            int n_not_ongrid = 0;
3964
0
            for (int k = 0; k < bs4; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
3965
0
            if (n_not_ongrid > 0 && scale > 0) {
3966
0
                float id = 1/scale;
3967
0
                for (int k = 0; k < bs4; ++k) {
3968
                    //if (is_on_grid[k]) continue;
3969
0
                    uint16_t u = 0;
3970
0
                    for (int i = 0; i < 4; ++i) {
3971
0
                        int l = nearest_int(0.5f*(id*xval[4*k+i]-1));
3972
0
                        l = MAX(0, MIN(kMaxQ-1, l));
3973
0
                        u |= (l << 3*i);
3974
0
                    }
3975
0
                    int grid_index = kmap_q3xs[u];
3976
0
                    if (grid_index < 0) {
3977
0
                        const uint16_t * neighbours = kneighbors_q3xs - kmap_q3xs[u] - 1;
3978
0
                        grid_index = iq3_find_best_neighbour(neighbours, kgrid_q3xs, xval + 4*k, waux + 4*k, scale, L + 4*k);
3979
0
                    }
3980
0
                    const int8_t * pg = (const int8_t *)(kgrid_q3xs + grid_index);
3981
0
                    for (int i = 0; i < 4; ++i) L[4*k+i] = (pg[i] - 1)/2;
3982
0
                }
3983
0
                float sumqx = 0, sumq2 = 0;
3984
0
                for (int i = 0; i < block_size; ++i) {
3985
0
                    float w = weight[i];
3986
0
                    float q = 2*L[i] + 1;
3987
0
                    sumqx += w*xval[i]*q;
3988
0
                    sumq2 += w*q*q;
3989
0
                }
3990
0
                if (sumq2 > 0) scale = sumqx/sumq2;
3991
0
            }
3992
0
            if (scale < 0) {
3993
                // This should never happen, but just in case, flip scale so that it is positive (we use uint's to encode the scale)
3994
                // and correspondingly flip quant signs.
3995
0
                scale = -scale;
3996
0
                for (int k = 0; k < bs8; ++k) block_signs[k] = ~block_signs[k];
3997
0
            }
3998
0
            for (int k = 0; k < bs4; ++k) {
3999
0
                uint16_t u = 0;
4000
0
                for (int i = 0; i < 4; ++i) u |= (L[4*k+i] << 3*i);
4001
0
                int grid_index = kmap_q3xs[u];
4002
0
                if (grid_index < 0) {
4003
0
                    printf("Oops: found point %u not on grid:", u);
4004
0
                    for (int i = 0; i < 4; ++i) printf(" %d", L[4*k+i]);
4005
0
                    printf("\n");
4006
0
                    GGML_ABORT("fatal error");
4007
0
                }
4008
0
                qs[k] = grid_index & 255;
4009
0
                qh[(ib*bs4+k)/8] |= ((grid_index >> 8) << ((ib*bs4+k)%8));
4010
0
            }
4011
0
            qs += bs4;
4012
0
            for (int k = 0; k < bs8; ++k) signs[k] = block_signs[k];
4013
0
            signs += bs8;
4014
0
            GGML_ASSERT(scale >= 0);
4015
0
            scales[ib] = scale;
4016
0
            max_scale = MAX(max_scale, scale);
4017
0
        }
4018
4019
0
        if (!max_scale) {
4020
0
            continue;
4021
0
        }
4022
4023
0
        float d = max_scale/31;
4024
0
        y[ibl].d = GGML_FP32_TO_FP16(d * 1.033f);
4025
0
        float id = 1/d;
4026
0
        for (int ib = 0; ib < QK_K/block_size; ib += 2) {
4027
0
            int l1 = nearest_int(0.5f*(id*scales[ib+0]-1));
4028
0
            l1 = MAX(0, MIN(15, l1));
4029
0
            int l2 = nearest_int(0.5f*(id*scales[ib+1]-1));
4030
0
            l2 = MAX(0, MIN(15, l2));
4031
0
            y[ibl].scales[ib/2] = l1 | (l2 << 4);
4032
0
        }
4033
4034
0
    }
4035
0
}
4036
4037
0
#define IQ3S_BLOCK_SIZE 32
4038
0
size_t quantize_iq3_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
4039
0
    GGML_ASSERT(n_per_row%QK_K == 0);
4040
0
    int64_t nblock = n_per_row/QK_K;
4041
0
    float scales[QK_K/IQ3S_BLOCK_SIZE];
4042
0
    float weight[IQ3S_BLOCK_SIZE];
4043
0
    float xval[IQ3S_BLOCK_SIZE];
4044
0
    int8_t L[IQ3S_BLOCK_SIZE];
4045
0
    int8_t Laux[IQ3S_BLOCK_SIZE];
4046
0
    float  waux[IQ3S_BLOCK_SIZE];
4047
0
    bool   is_on_grid[IQ3S_BLOCK_SIZE/4];
4048
0
    bool   is_on_grid_aux[IQ3S_BLOCK_SIZE/4];
4049
0
    uint8_t block_signs[IQ3S_BLOCK_SIZE/8];
4050
0
    char * qrow = (char *)dst;
4051
0
    for (int64_t row = 0; row < nrow; ++row) {
4052
0
        quantize_row_iq3_s_impl(IQ3S_BLOCK_SIZE, src, qrow, n_per_row, quant_weights,
4053
0
                scales, weight, xval, L, Laux, waux, is_on_grid, is_on_grid_aux, block_signs);
4054
0
        src += n_per_row;
4055
0
        qrow += nblock*sizeof(block_iq3_s);
4056
0
    }
4057
0
    return nrow * nblock * sizeof(block_iq3_s);
4058
0
}
4059
4060
0
void quantize_row_iq3_s_ref(const float * GGML_RESTRICT x, block_iq3_s * GGML_RESTRICT y, int64_t k) {
4061
0
    assert(k % QK_K == 0);
4062
0
    quantize_iq3_s(x, y, 1, k, NULL);
4063
0
}
4064
4065
4066
// =================================== 1.5 bpw ===================================================
4067
4068
static int iq1_find_best_neighbour(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
4069
0
        const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float * scale, int8_t * GGML_RESTRICT L, int ngrid) {
4070
0
    int num_neighbors = neighbours[0];
4071
0
    GGML_ASSERT(num_neighbors > 0);
4072
0
    float best_score = -FLT_MAX;
4073
0
    int grid_index = -1;
4074
0
    for (int j = 1; j <= num_neighbors; ++j) {
4075
0
        const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
4076
0
        float sumqx = 0, sumq2 = 0;
4077
0
        for (int i = 0; i < 8; ++i) {
4078
0
            float q = (pg[i] - 3)/2;
4079
0
            float w = weight[i];
4080
0
            sumqx += w*q*xval[i];
4081
0
            sumq2 += w*q*q;
4082
0
        }
4083
0
        if (sumqx > 0 && sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
4084
0
            *scale = sumqx/sumq2; best_score = *scale * sumqx;
4085
0
            grid_index = neighbours[j];
4086
0
        }
4087
0
    }
4088
0
    if (grid_index < 0) {
4089
0
        for (int i = 0; i < ngrid; ++i) {
4090
0
            const int8_t * grid_i = (const int8_t *)(grid + i);
4091
0
            float sumqx = 0, sumq2 = 0;
4092
0
            for (int j = 0; j < 8; ++j) {
4093
0
                float w = weight[j];
4094
0
                float q = (grid_i[j] - 3)/2;
4095
0
                sumqx += w*q*xval[j];
4096
0
                sumq2 += w*q*q;
4097
0
            }
4098
0
            if (sumqx > 0 && sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
4099
0
                *scale = sumqx/sumq2; best_score = *scale*sumqx;
4100
0
                grid_index = i;
4101
0
            }
4102
0
        }
4103
0
    }
4104
0
    if (grid_index < 0) {
4105
0
        printf("Oops, did not find grid point\n");
4106
0
        printf("Have %d neighbours\n", num_neighbors);
4107
0
        for (int j = 1; j <= num_neighbors; ++j) {
4108
0
            const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
4109
0
            float sumqx = 0, sumq2 = 0;
4110
0
            for (int i = 0; i < 8; ++i) {
4111
0
                float q = (pg[i] - 3)/2;
4112
0
                float w = weight[i];
4113
0
                sumqx += w*q*xval[i];
4114
0
                sumq2 += w*q*q;
4115
0
            }
4116
0
            printf("    neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2);
4117
0
        }
4118
0
    }
4119
0
    GGML_ASSERT(grid_index >= 0);
4120
0
    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4121
0
    *scale *= 1.05f;  // This is a fudge factor. Don't ask me why it improves the result.
4122
0
    //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
4123
0
    const int8_t * pg = (const int8_t *)(grid + grid_index);
4124
0
    for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
4125
0
    return grid_index;
4126
0
}
4127
4128
static int iq1_find_best_neighbour2(const uint16_t * GGML_RESTRICT neighbours, const uint64_t * GGML_RESTRICT grid,
4129
0
        const float * GGML_RESTRICT xval, const float * GGML_RESTRICT weight, float scale, const float * GGML_RESTRICT xg, int8_t * GGML_RESTRICT L, int ngrid) {
4130
0
    int num_neighbors = neighbours[0];
4131
0
    GGML_ASSERT(num_neighbors > 0);
4132
0
    float best_score = FLT_MAX;
4133
0
    int grid_index = -1;
4134
0
    for (int j = 1; j <= num_neighbors; ++j) {
4135
0
        const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
4136
0
        float d2 = 0;
4137
0
        for (int i = 0; i < 8; ++i) {
4138
0
            float q = xg[(pg[i] - 1)/2];
4139
0
            float w = weight[i];
4140
0
            float diff = scale*q - xval[i];
4141
0
            d2 += w*diff*diff;
4142
0
        }
4143
0
        if (d2 < best_score) {
4144
0
            best_score = d2;
4145
0
            grid_index = neighbours[j];
4146
0
        }
4147
0
    }
4148
0
    if (grid_index < 0) {
4149
0
        for (int i = 0; i < ngrid; ++i) {
4150
0
            const int8_t * grid_i = (const int8_t *)(grid + i);
4151
0
            float d2 = 0;
4152
0
            for (int j = 0; j < 8; ++j) {
4153
0
                float w = weight[j];
4154
0
                float q = xg[(grid_i[j] - 1)/2];
4155
0
                float diff = scale*q - xval[i];
4156
0
                d2 += w*diff*diff;
4157
0
            }
4158
0
            if (d2 < best_score) {
4159
0
                best_score = d2;
4160
0
                grid_index = i;
4161
0
            }
4162
0
        }
4163
0
    }
4164
0
    if (grid_index < 0) {
4165
0
        printf("Oops, did not find grid point\n");
4166
0
        printf("Have %d neighbours\n", num_neighbors);
4167
0
        for (int j = 1; j <= num_neighbors; ++j) {
4168
0
            const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
4169
0
            float sumqx = 0, sumq2 = 0;
4170
0
            for (int i = 0; i < 8; ++i) {
4171
0
                float q = xg[(pg[i] - 1)/2];
4172
0
                float w = weight[i];
4173
0
                sumqx += w*q*xval[i];
4174
0
                sumq2 += w*q*q;
4175
0
            }
4176
0
            printf("    neighbour %d: sumqx = %g sumq2 = %g\n", j, (double)sumqx, (double)sumq2);
4177
0
        }
4178
0
    }
4179
0
    GGML_ASSERT(grid_index >= 0);
4180
0
    const int8_t * pg = (const int8_t *)(grid + grid_index);
4181
0
    for (int i = 0; i < 8; ++i) L[i] = (pg[i] - 1)/2;
4182
0
    return grid_index;
4183
0
}
4184
4185
0
static int iq1_sort_helper(const void * left, const void * right) {
4186
0
    const float * l = left;
4187
0
    const float * r = right;
4188
0
    return *l < *r ? -1 : *l > *r ? 1 : 0;
4189
0
}
4190
4191
0
#define IQ1S_BLOCK_SIZE 32
4192
0
#define IQ1M_BLOCK_SIZE 16
4193
static void quantize_row_iq1_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
4194
        float    * scales,
4195
        float    * weight,
4196
        float    * sumx,
4197
        float    * sumw,
4198
        float    * pairs,
4199
        int8_t   * L,
4200
        uint16_t * index,
4201
0
        int8_t   * shifts) {
4202
4203
0
    const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
4204
4205
0
    const uint64_t * kgrid_q2xs      = iq2_data[gindex].grid;
4206
0
    const int      * kmap_q2xs       = iq2_data[gindex].map;
4207
0
    const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
4208
4209
0
    GGML_ASSERT(quant_weights   && "missing quantization weights");
4210
0
    GGML_ASSERT(kgrid_q2xs      && "forgot to call ggml_quantize_init()?");
4211
0
    GGML_ASSERT(kmap_q2xs       && "forgot to call ggml_quantize_init()?");
4212
0
    GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
4213
0
    GGML_ASSERT(n%QK_K == 0);
4214
4215
0
    block_iq1_s * y = vy;
4216
4217
0
    const int64_t nbl = n/QK_K;
4218
4219
0
    const int block_size = IQ1S_BLOCK_SIZE;
4220
4221
0
    const float x_p[3] = {-1 + IQ1S_DELTA,  IQ1S_DELTA, 1 + IQ1S_DELTA};
4222
0
    const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
4223
4224
4225
0
    int * idx = (int *)(pairs + 1);
4226
4227
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
4228
4229
0
        y[ibl].d = GGML_FP32_TO_FP16(0.f);
4230
0
        memset(y[ibl].qs, 0, QK_K/8);
4231
0
        memset(y[ibl].qh, 0, QK_K/16);
4232
4233
0
        float max_scale = 0;
4234
4235
0
        const float * xbl = x + QK_K*ibl;
4236
0
        float sumx2 = 0;
4237
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
4238
0
        float sigma2 = 2*sumx2/QK_K;
4239
4240
0
        for (int ib = 0; ib < QK_K/block_size; ++ib) {
4241
0
            const float * xb = xbl + block_size*ib;
4242
0
            const float * qw = quant_weights + QK_K*ibl + block_size*ib;
4243
0
            for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
4244
0
            float max = fabsf(xb[0]);
4245
0
            for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
4246
0
            if (max < GROUP_MAX_EPS_IQ1_S) {
4247
0
                scales[ib] = 0;
4248
0
                memset(L, 1, block_size);
4249
0
                continue;
4250
0
            }
4251
            // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
4252
            // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
4253
            // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
4254
            // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
4255
            // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
4256
            // for each possible and score for each split.
4257
0
            for (int j = 0; j < block_size; ++j) {
4258
0
                pairs[2*j] = xb[j];
4259
0
                idx[2*j] = j;
4260
0
            }
4261
0
            qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4262
0
            {
4263
0
                sumx[0] = sumw[0] = 0;
4264
0
                for (int j = 0; j < block_size; ++j) {
4265
0
                    int i = idx[2*j];
4266
0
                    sumx[j+1] = sumx[j] + weight[i]*xb[i];
4267
0
                    sumw[j+1] = sumw[j] + weight[i];
4268
0
                }
4269
0
            }
4270
0
            float best_score = -FLT_MAX, scale = max;
4271
0
            int besti1 = -1, besti2 = -1, best_shift = 0;
4272
0
            for (int i1 = 0; i1 <= block_size; ++i1) {
4273
0
                for (int i2 = i1; i2 <= block_size; ++i2) {
4274
0
                    float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
4275
0
                    float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
4276
0
                    if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
4277
0
                        scale = sumqx/sumq2; best_score = scale*sumqx;
4278
0
                        besti1 = i1; besti2 = i2; best_shift = 1;
4279
0
                    }
4280
0
                    sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
4281
0
                    sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
4282
0
                    if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
4283
0
                        scale = sumqx/sumq2; best_score = scale*sumqx;
4284
0
                        besti1 = i1; besti2 = i2; best_shift = -1;
4285
0
                    }
4286
0
                }
4287
0
            }
4288
0
            GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
4289
0
            for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0;
4290
0
            for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
4291
0
            for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
4292
0
            if (scale < 0) {
4293
0
                for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
4294
0
                scale = -scale; best_shift = -best_shift;
4295
0
            }
4296
0
            bool all_on_grid = true;
4297
0
            const float * xx = best_shift == 1 ? x_p : x_m;
4298
0
            for (int k = 0; k < block_size/8; ++k) {
4299
0
                uint16_t u = 0;
4300
0
                for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
4301
0
                int grid_index = kmap_q2xs[u];
4302
0
                if (grid_index < 0) {
4303
0
                    all_on_grid = false;
4304
0
                    const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
4305
0
                    grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
4306
0
                    GGML_ASSERT(grid_index >= 0);
4307
0
                }
4308
0
                index[k] = grid_index;
4309
0
            }
4310
0
            if (!all_on_grid) {
4311
0
                float sumqx = 0, sumq2 = 0;
4312
0
                for (int k = 0; k < block_size/8; ++k) {
4313
0
                    const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
4314
0
                    for (int j = 0; j < 8; ++j) {
4315
0
                        float w = weight[8*k + j];
4316
0
                        float q = xx[(pg[j] - 1)/2];
4317
0
                        sumqx += w*q*xb[8*k+j];
4318
0
                        sumq2 += w*q*q;
4319
0
                    }
4320
0
                }
4321
0
                if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
4322
0
            }
4323
0
            uint16_t h = 0;
4324
0
            for (int k = 0; k < block_size/8; ++k) {
4325
0
                y[ibl].qs[(block_size/8)*ib + k] = index[k] & 255;
4326
0
                h |= (index[k] >> 8) << 3*k;
4327
0
            }
4328
0
            y[ibl].qh[ib] = h;
4329
0
            GGML_ASSERT(scale >= 0);
4330
0
            scales[ib] = scale;
4331
0
            shifts[ib] = best_shift;
4332
0
            max_scale = MAX(max_scale, scale);
4333
0
        }
4334
4335
0
        if (!max_scale) {
4336
0
            continue;
4337
0
        }
4338
4339
0
        float d = max_scale/15;
4340
0
        y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.125f is another fudge factor. Don't ask me why it is needed.
4341
0
        float id = 1/d;
4342
0
        for (int ib = 0; ib < QK_K/block_size; ++ib) {
4343
0
            int l = nearest_int(0.5f*(id*scales[ib]-1));
4344
0
            l = MAX(0, MIN(7, l));
4345
0
            if (shifts[ib] == -1) l |= 8;
4346
0
            y[ibl].qh[ib] |= (l << 12);
4347
0
        }
4348
0
    }
4349
0
}
4350
4351
0
size_t quantize_iq1_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
4352
0
    GGML_ASSERT(n_per_row%QK_K == 0);
4353
0
    float  scales[QK_K/IQ1S_BLOCK_SIZE];
4354
0
    float  weight[IQ1S_BLOCK_SIZE];
4355
0
    int8_t L[IQ1S_BLOCK_SIZE];
4356
0
    float  sumx[IQ1S_BLOCK_SIZE+1];
4357
0
    float  sumw[IQ1S_BLOCK_SIZE+1];
4358
0
    float  pairs[2*IQ1S_BLOCK_SIZE];
4359
0
    uint16_t index[IQ1S_BLOCK_SIZE/8];
4360
0
    int8_t shifts[QK_K/IQ1S_BLOCK_SIZE];
4361
0
    int64_t nblock = n_per_row/QK_K;
4362
0
    char * qrow = (char *)dst;
4363
0
    for (int64_t row = 0; row < nrow; ++row) {
4364
0
        quantize_row_iq1_s_impl(src, qrow, n_per_row, quant_weights, scales, weight, sumx, sumw, pairs, L, index, shifts);
4365
0
        src += n_per_row;
4366
0
        qrow += nblock*sizeof(block_iq1_s);
4367
0
    }
4368
0
    return nrow * nblock * sizeof(block_iq1_s);
4369
0
}
4370
4371
static void quantize_row_iq1_m_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights,
4372
        float    * scales,
4373
        float    * weight,
4374
        float    * pairs,
4375
        int8_t   * L,
4376
        uint16_t * index,
4377
0
        int8_t   * shifts) {
4378
4379
0
    const int gindex = iq2_data_index(GGML_TYPE_IQ1_M);
4380
4381
0
    const uint64_t * kgrid_q2xs      = iq2_data[gindex].grid;
4382
0
    const int      * kmap_q2xs       = iq2_data[gindex].map;
4383
0
    const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
4384
4385
    //GGML_ASSERT(quant_weights   && "missing quantization weights");
4386
0
    GGML_ASSERT(kgrid_q2xs      && "forgot to call ggml_quantize_init()?");
4387
0
    GGML_ASSERT(kmap_q2xs       && "forgot to call ggml_quantize_init()?");
4388
0
    GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
4389
0
    GGML_ASSERT(n%QK_K == 0);
4390
4391
0
    block_iq1_m * y = vy;
4392
4393
0
    const int64_t nbl = n/QK_K;
4394
4395
0
    const int block_size = IQ1M_BLOCK_SIZE;
4396
4397
0
    const float x_p[3] = {-1 + IQ1M_DELTA,  IQ1M_DELTA, 1 + IQ1M_DELTA};
4398
0
    const float x_m[3] = {-1 - IQ1M_DELTA, -IQ1M_DELTA, 1 - IQ1M_DELTA};
4399
0
    const uint8_t masks[4] = {0x00, 0x80, 0x08, 0x88};
4400
4401
0
    int * idx = (int *)(pairs + 1);
4402
4403
0
    float sumqx[4], sumq2[4];
4404
4405
0
    iq1m_scale_t s;
4406
0
    const float * xx;
4407
4408
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
4409
0
        memset(y[ibl].qs, 0, QK_K/8);
4410
0
        memset(y[ibl].qh, 0, QK_K/16);
4411
0
        memset(y[ibl].scales, 0, QK_K/32);
4412
4413
0
        float max_scale = 0;
4414
4415
0
        const float * xbl = x + QK_K*ibl;
4416
0
        float sumx2 = 0;
4417
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
4418
0
        float sigma2 = 2*sumx2/QK_K;
4419
4420
0
        for (int ib = 0; ib < QK_K/block_size; ++ib) {
4421
0
            const float * xb = xbl + block_size*ib;
4422
0
            if (quant_weights) {
4423
0
                const float * qw = quant_weights + QK_K*ibl + block_size*ib;
4424
0
                for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
4425
0
            } else {
4426
0
                for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
4427
0
            }
4428
0
            float max = fabsf(xb[0]);
4429
0
            for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
4430
0
            if (max < GROUP_MAX_EPS_IQ1_M) {
4431
0
                scales[ib] = 0;
4432
0
                memset(L, 1, block_size);
4433
0
                continue;
4434
0
            }
4435
            // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
4436
            // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
4437
            // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
4438
            // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
4439
            // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
4440
            // for each possible and score for each split.
4441
0
            for (int j = 0; j < block_size; ++j) {
4442
0
                pairs[2*j] = xb[j];
4443
0
                idx[2*j] = j;
4444
0
            }
4445
0
            qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
4446
0
            float best_score = -FLT_MAX, scale = max;
4447
0
            int besti1 = -1, besti2 = -1, best_k = -1;
4448
            // 0: +, +
4449
            // 1: +, -
4450
            // 2: -, +
4451
            // 3: -, -
4452
0
            for (int i1 = 0; i1 <= block_size; ++i1) {
4453
0
                for (int i2 = i1; i2 <= block_size; ++i2) {
4454
0
                    memset(sumqx, 0, 4*sizeof(float));
4455
0
                    memset(sumq2, 0, 4*sizeof(float));
4456
0
                    for (int j = 0; j < i1; ++j) {
4457
0
                        int i = idx[2*j];
4458
0
                        if (i < block_size/2) {
4459
0
                            sumqx[0] += weight[i]*x_p[0]*xb[i];
4460
0
                            sumqx[1] += weight[i]*x_p[0]*xb[i];
4461
0
                            sumqx[2] += weight[i]*x_m[0]*xb[i];
4462
0
                            sumqx[3] += weight[i]*x_m[0]*xb[i];
4463
0
                            sumq2[0] += weight[i]*x_p[0]*x_p[0];
4464
0
                            sumq2[1] += weight[i]*x_p[0]*x_p[0];
4465
0
                            sumq2[2] += weight[i]*x_m[0]*x_m[0];
4466
0
                            sumq2[3] += weight[i]*x_m[0]*x_m[0];
4467
0
                        } else {
4468
0
                            sumqx[0] += weight[i]*x_p[0]*xb[i];
4469
0
                            sumqx[2] += weight[i]*x_p[0]*xb[i];
4470
0
                            sumqx[1] += weight[i]*x_m[0]*xb[i];
4471
0
                            sumqx[3] += weight[i]*x_m[0]*xb[i];
4472
0
                            sumq2[0] += weight[i]*x_p[0]*x_p[0];
4473
0
                            sumq2[2] += weight[i]*x_p[0]*x_p[0];
4474
0
                            sumq2[1] += weight[i]*x_m[0]*x_m[0];
4475
0
                            sumq2[3] += weight[i]*x_m[0]*x_m[0];
4476
0
                        }
4477
0
                    }
4478
0
                    for (int j = i1; j < i2; ++j) {
4479
0
                        int i = idx[2*j];
4480
0
                        if (i < block_size/2) {
4481
0
                            sumqx[0] += weight[i]*x_p[1]*xb[i];
4482
0
                            sumqx[1] += weight[i]*x_p[1]*xb[i];
4483
0
                            sumqx[2] += weight[i]*x_m[1]*xb[i];
4484
0
                            sumqx[3] += weight[i]*x_m[1]*xb[i];
4485
0
                            sumq2[0] += weight[i]*x_p[1]*x_p[1];
4486
0
                            sumq2[1] += weight[i]*x_p[1]*x_p[1];
4487
0
                            sumq2[2] += weight[i]*x_m[1]*x_m[1];
4488
0
                            sumq2[3] += weight[i]*x_m[1]*x_m[1];
4489
0
                        } else {
4490
0
                            sumqx[0] += weight[i]*x_p[1]*xb[i];
4491
0
                            sumqx[2] += weight[i]*x_p[1]*xb[i];
4492
0
                            sumqx[1] += weight[i]*x_m[1]*xb[i];
4493
0
                            sumqx[3] += weight[i]*x_m[1]*xb[i];
4494
0
                            sumq2[0] += weight[i]*x_p[1]*x_p[1];
4495
0
                            sumq2[2] += weight[i]*x_p[1]*x_p[1];
4496
0
                            sumq2[1] += weight[i]*x_m[1]*x_m[1];
4497
0
                            sumq2[3] += weight[i]*x_m[1]*x_m[1];
4498
0
                        }
4499
0
                    }
4500
0
                    for (int j = i2; j < block_size; ++j) {
4501
0
                        int i = idx[2*j];
4502
0
                        if (i < block_size/2) {
4503
0
                            sumqx[0] += weight[i]*x_p[2]*xb[i];
4504
0
                            sumqx[1] += weight[i]*x_p[2]*xb[i];
4505
0
                            sumqx[2] += weight[i]*x_m[2]*xb[i];
4506
0
                            sumqx[3] += weight[i]*x_m[2]*xb[i];
4507
0
                            sumq2[0] += weight[i]*x_p[2]*x_p[2];
4508
0
                            sumq2[1] += weight[i]*x_p[2]*x_p[2];
4509
0
                            sumq2[2] += weight[i]*x_m[2]*x_m[2];
4510
0
                            sumq2[3] += weight[i]*x_m[2]*x_m[2];
4511
0
                        } else {
4512
0
                            sumqx[0] += weight[i]*x_p[2]*xb[i];
4513
0
                            sumqx[2] += weight[i]*x_p[2]*xb[i];
4514
0
                            sumqx[1] += weight[i]*x_m[2]*xb[i];
4515
0
                            sumqx[3] += weight[i]*x_m[2]*xb[i];
4516
0
                            sumq2[0] += weight[i]*x_p[2]*x_p[2];
4517
0
                            sumq2[2] += weight[i]*x_p[2]*x_p[2];
4518
0
                            sumq2[1] += weight[i]*x_m[2]*x_m[2];
4519
0
                            sumq2[3] += weight[i]*x_m[2]*x_m[2];
4520
0
                        }
4521
0
                    }
4522
0
                    for (int k = 0; k < 4; ++k) {
4523
0
                        if (sumq2[k] > 0 && sumqx[k]*sumqx[k] > best_score*sumq2[k]) {
4524
0
                            scale = sumqx[k]/sumq2[k]; best_score = scale*sumqx[k];
4525
0
                            besti1 = i1; besti2 = i2; best_k = k;
4526
0
                        }
4527
0
                    }
4528
0
                }
4529
0
            }
4530
0
            GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_k >= 0);
4531
0
            for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0;
4532
0
            for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
4533
0
            for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
4534
0
            if (scale < 0) {
4535
0
                for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
4536
0
                scale = -scale;
4537
0
                best_k = best_k == 0 ? 3 : best_k == 1 ? 2 : best_k == 2 ? 1 : 0;
4538
0
            }
4539
0
            bool all_on_grid = true;
4540
0
            for (int k = 0; k < block_size/8; ++k) {
4541
0
                if (k == 0) xx = best_k < 2 ? x_p : x_m;
4542
0
                else xx = best_k%2 == 0 ? x_p : x_m;
4543
0
                uint16_t u = 0;
4544
0
                for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
4545
0
                int grid_index = kmap_q2xs[u];
4546
0
                if (grid_index < 0) {
4547
0
                    all_on_grid = false;
4548
0
                    const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
4549
0
                    grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
4550
0
                    GGML_ASSERT(grid_index >= 0);
4551
0
                }
4552
0
                index[k] = grid_index;
4553
0
            }
4554
0
            if (!all_on_grid) {
4555
0
                float sumqx_f = 0, sumq2_f = 0;
4556
0
                for (int k = 0; k < block_size/8; ++k) {
4557
0
                    if (k == 0) xx = best_k < 2 ? x_p : x_m;
4558
0
                    else xx = best_k%2 == 0 ? x_p : x_m;
4559
0
                    const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
4560
0
                    for (int j = 0; j < 8; ++j) {
4561
0
                        float w = weight[8*k + j];
4562
0
                        float q = xx[(pg[j] - 1)/2];
4563
0
                        sumqx_f += w*q*xb[8*k+j];
4564
0
                        sumq2_f += w*q*q;
4565
0
                    }
4566
0
                }
4567
0
                if (sumqx_f > 0 && sumq2_f > 0) scale = sumqx_f/sumq2_f;
4568
0
            }
4569
0
            y[ibl].qs[2*ib + 0] = index[0] & 255;
4570
0
            y[ibl].qs[2*ib + 1] = index[1] & 255;
4571
0
            y[ibl].qh[ib] = (index[0] >> 8) | ((index[1] >> 8) << 4);
4572
0
            GGML_ASSERT(scale >= 0);
4573
0
            scales[ib] = scale;
4574
0
            shifts[ib] = best_k;
4575
0
            max_scale = MAX(max_scale, scale);
4576
0
        }
4577
4578
0
        if (!max_scale) {
4579
0
            continue;
4580
0
        }
4581
4582
0
        uint16_t * sc = (uint16_t *)y[ibl].scales;
4583
0
        float d = max_scale/15;
4584
0
        float id = 1/d;
4585
0
        float sumqx_f = 0, sumq2_f = 0;
4586
0
        for (int ib = 0; ib < QK_K/block_size; ++ib) {
4587
0
            int l = nearest_int(0.5f*(id*scales[ib+0]-1));
4588
0
            l = MAX(0, MIN(7, l));
4589
0
            sc[ib/4] |= (l << 3*(ib%4));
4590
0
            y[ibl].qh[ib] |= masks[shifts[ib]];
4591
0
            const float * xb = xbl + block_size*ib;
4592
0
            if (quant_weights) {
4593
0
                const float * qw = quant_weights + QK_K*ibl + block_size*ib;
4594
0
                for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
4595
0
            } else {
4596
0
                for (int i = 0; i < block_size; ++i) weight[i] = xb[i]*xb[i];
4597
0
            }
4598
0
            for (int k = 0; k < block_size/8; ++k) {
4599
0
                if (k == 0) xx = shifts[ib] < 2 ? x_p : x_m;
4600
0
                else xx = shifts[ib]%2 == 0 ? x_p : x_m;
4601
0
                const int8_t * pg = (const int8_t *)(kgrid_q2xs + y[ibl].qs[2*ib+k] + ((y[ibl].qh[ib] << (8 - 4*k)) & 0x700));
4602
0
                for (int j = 0; j < 8; ++j) {
4603
0
                    float w = weight[8*k + j];
4604
0
                    float q = xx[(pg[j] - 1)/2]*(2*l+1);
4605
0
                    sumqx_f += w*q*xb[8*k+j];
4606
0
                    sumq2_f += w*q*q;
4607
0
                }
4608
0
            }
4609
0
        }
4610
0
        if (sumq2_f > 0) d = sumqx_f/sumq2_f;
4611
0
        s.f16 = GGML_FP32_TO_FP16(d*1.1125f); // 1.1125f is another fudge factor. Don't ask me why it is needed.
4612
0
        sc[0] |= ((s.u16 & 0x000f) << 12);
4613
0
        sc[1] |= ((s.u16 & 0x00f0) <<  8);
4614
0
        sc[2] |= ((s.u16 & 0x0f00) <<  4);
4615
0
        sc[3] |= ((s.u16 & 0xf000) <<  0);
4616
0
    }
4617
0
}
4618
4619
0
size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
4620
0
    GGML_ASSERT(n_per_row%QK_K == 0);
4621
0
    float  scales[QK_K/IQ1M_BLOCK_SIZE];
4622
0
    float  weight[IQ1M_BLOCK_SIZE];
4623
0
    int8_t L[IQ1M_BLOCK_SIZE];
4624
0
    float  pairs[2*IQ1M_BLOCK_SIZE];
4625
0
    uint16_t index[IQ1M_BLOCK_SIZE/8];
4626
0
    int8_t shifts[QK_K/IQ1M_BLOCK_SIZE];
4627
0
    int64_t nblock = n_per_row/QK_K;
4628
0
    char * qrow = (char *)dst;
4629
0
    for (int64_t row = 0; row < nrow; ++row) {
4630
0
        quantize_row_iq1_m_impl(src, qrow, n_per_row, quant_weights, scales, weight, pairs, L, index, shifts);
4631
0
        src += n_per_row;
4632
0
        qrow += nblock*sizeof(block_iq1_m);
4633
0
    }
4634
0
    return nrow * nblock * sizeof(block_iq1_m);
4635
0
}
4636
4637
// ============================ 4-bit non-linear quants
4638
4639
static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * GGML_RESTRICT x,
4640
        ggml_fp16_t * dh, uint8_t * q4, uint16_t * scales_h, uint8_t * scales_l,
4641
        float * scales, float * weight, uint8_t * L,
4642
        const int8_t * values,
4643
        const float * quant_weights,
4644
0
        const int ntry) {
4645
4646
0
    float sigma2 = 0;
4647
0
    for (int j = 0; j < super_block_size; ++j) sigma2 += x[j]*x[j];
4648
0
    sigma2 *= 2.f/super_block_size;
4649
4650
0
    memset(q4, 0, super_block_size/2);
4651
0
    dh[0] = GGML_FP32_TO_FP16(0.f);
4652
4653
0
    float max_scale = 0, amax_scale = 0;
4654
0
    for (int ib = 0; ib < super_block_size/block_size; ++ib) {
4655
0
        const float * xb = x + ib*block_size;
4656
0
        uint8_t * Lb = L + ib*block_size;
4657
0
        if (quant_weights) {
4658
0
            const float * qw = quant_weights + ib*block_size;
4659
0
            for (int j = 0; j < block_size; ++j) weight[j] = qw[j] * sqrtf(sigma2 + xb[j]*xb[j]);
4660
0
        } else {
4661
0
            for (int j = 0; j < block_size; ++j) weight[j] = xb[j]*xb[j];
4662
0
        }
4663
0
        float amax = 0, max = 0;
4664
0
        for (int j = 0; j < block_size; ++j) {
4665
0
            float ax = fabsf(xb[j]);
4666
0
            if (ax > amax) {
4667
0
                amax = ax; max = xb[j];
4668
0
            }
4669
0
        }
4670
0
        if (amax < GROUP_MAX_EPS) {
4671
0
            scales[ib] = 0;
4672
0
            continue;
4673
0
        }
4674
0
        float d = ntry > 0 ? -max/values[0] : max/values[0];
4675
0
        float id = 1/d;
4676
0
        float sumqx = 0, sumq2 = 0;
4677
0
        for (int j = 0; j < block_size; ++j) {
4678
0
            float al = id*xb[j];
4679
0
            int l = best_index_int8(16, values, al);
4680
0
            Lb[j] = l;
4681
0
            float q = values[l];
4682
0
            float w = weight[j];
4683
0
            sumqx += w*q*xb[j];
4684
0
            sumq2 += w*q*q;
4685
0
        }
4686
0
        d = sumqx/sumq2;
4687
0
        float best = d*sumqx;
4688
0
        for (int itry = -ntry; itry <= ntry; ++itry) {
4689
0
            id = (itry + values[0])/max;
4690
0
            sumqx = sumq2 = 0;
4691
0
            for (int j = 0; j < block_size; ++j) {
4692
0
                float al = id*xb[j];
4693
0
                int l = best_index_int8(16, values, al);
4694
0
                float q = values[l];
4695
0
                float w = weight[j];
4696
0
                sumqx += w*q*xb[j];
4697
0
                sumq2 += w*q*q;
4698
0
            }
4699
0
            if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
4700
0
                d = sumqx/sumq2; best = d * sumqx;
4701
0
            }
4702
0
        }
4703
0
        scales[ib] = d;
4704
0
        float abs_d = fabsf(d);
4705
0
        if (abs_d > amax_scale) {
4706
0
            amax_scale = abs_d; max_scale = d;
4707
0
        }
4708
0
    }
4709
4710
0
    if (super_block_size/block_size > 1) {
4711
0
        int nb = super_block_size/block_size;
4712
0
        memset(scales_h, 0, ((nb+7)/8)*sizeof(uint16_t));
4713
0
        float d = -max_scale/32;
4714
0
        dh[0] = GGML_FP32_TO_FP16(d);
4715
0
        float id = d ? 1/d : 0.f;
4716
0
        for (int ib = 0; ib < super_block_size/block_size; ++ib) {
4717
0
            int l = nearest_int(id*scales[ib]);
4718
0
            l = MAX(-32, MIN(31, l));
4719
0
            float dl = d * l;
4720
0
            float idl = dl ? 1/dl : 0.f;
4721
0
            uint8_t * Lb = L + ib*block_size;
4722
0
            const float * xb = x + ib*block_size;
4723
0
            for (int j = 0; j < block_size; ++j) {
4724
0
                Lb[j] = best_index_int8(16, values, idl*xb[j]);
4725
0
            }
4726
0
            l += 32;
4727
0
            uint8_t l_l = l & 0xf;
4728
0
            uint8_t l_h = l >>  4;
4729
0
            if (ib%2 == 0) scales_l[ib/2] = l_l;
4730
0
            else scales_l[ib/2] |= (l_l << 4);
4731
0
            scales_h[ib/8] |= (l_h << 2*(ib%8));
4732
0
        }
4733
0
    } else {
4734
0
        dh[0] = GGML_FP32_TO_FP16(scales[0]);
4735
0
        if (ntry > 0) {
4736
0
            float id = scales[0] ? 1/scales[0] : 0;
4737
0
            for (int j = 0; j < super_block_size; ++j) {
4738
0
                L[j] = best_index_int8(16, values, id*x[j]);
4739
0
            }
4740
0
        }
4741
0
    }
4742
4743
0
    for (int i = 0; i < super_block_size/32; ++i) {
4744
0
        for (int j = 0; j < 16; ++j) {
4745
0
            q4[16*i + j] = L[32*i + j] | (L[32*i + 16 + j] << 4);
4746
0
        }
4747
0
    }
4748
0
}
4749
4750
0
size_t quantize_iq4_nl(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
4751
0
    GGML_ASSERT(n_per_row%QK4_NL == 0);
4752
0
    int64_t nblock = n_per_row/QK4_NL;
4753
0
    char * qrow = (char *)dst;
4754
0
    uint8_t L[QK4_NL];
4755
0
    float weight[QK4_NL];
4756
0
    uint16_t unused_h;
4757
0
    uint8_t * unused_l = NULL;
4758
0
    float scale;
4759
0
    for (int64_t row = 0; row < nrow; ++row) {
4760
0
        block_iq4_nl * iq4 = (block_iq4_nl *)qrow;
4761
0
        for (int ibl = 0; ibl < nblock; ++ibl) {
4762
0
            const float * qw = quant_weights ? quant_weights + QK4_NL*ibl : NULL;
4763
0
            quantize_row_iq4_nl_impl(QK4_NL, 32, src + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
4764
0
                    &scale, weight, L, kvalues_iq4nl, qw, 7);
4765
0
        }
4766
0
        src += n_per_row;
4767
0
        qrow += nblock*sizeof(block_iq4_nl);
4768
0
    }
4769
0
    return nrow * nblock * sizeof(block_iq4_nl);
4770
0
}
4771
4772
//void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
4773
0
void quantize_row_iq4_nl_ref(const float * GGML_RESTRICT x, block_iq4_nl * GGML_RESTRICT y, int64_t k) {
4774
0
    GGML_ASSERT(k%QK4_NL == 0);
4775
0
    int64_t nblock = k/QK4_NL;
4776
0
    uint8_t L[QK4_NL];
4777
0
    float weight[QK4_NL];
4778
0
    uint16_t unused_h;
4779
0
    uint8_t * unused_l = NULL;
4780
0
    float scale;
4781
0
    block_iq4_nl * iq4 = y;
4782
0
    for (int ibl = 0; ibl < nblock; ++ibl) {
4783
0
        quantize_row_iq4_nl_impl(QK4_NL, 32, x + QK4_NL*ibl, &iq4[ibl].d, iq4[ibl].qs, &unused_h, unused_l,
4784
0
                &scale, weight, L, kvalues_iq4nl, NULL, -1);
4785
0
    }
4786
0
}
4787
4788
0
size_t quantize_iq4_xs(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
4789
0
    GGML_ASSERT(n_per_row%QK_K == 0);
4790
0
    int64_t nblock = n_per_row/QK_K;
4791
0
    char * qrow = (char *)dst;
4792
0
    uint8_t L[QK_K];
4793
0
    float weight[32];
4794
0
    float scales[QK_K/32];
4795
0
    for (int64_t row = 0; row < nrow; ++row) {
4796
0
        block_iq4_xs * iq4 = (block_iq4_xs *)qrow;
4797
0
        for (int ibl = 0; ibl < nblock; ++ibl) {
4798
0
            const float * qw = quant_weights ? quant_weights + QK_K*ibl : NULL;
4799
0
            quantize_row_iq4_nl_impl(QK_K, 32, src + QK_K*ibl, &iq4[ibl].d, iq4[ibl].qs, &iq4[ibl].scales_h, iq4[ibl].scales_l,
4800
0
                    scales, weight, L, kvalues_iq4nl, qw, 7);
4801
0
        }
4802
0
        src += n_per_row;
4803
0
        qrow += nblock*sizeof(block_iq4_xs);
4804
0
    }
4805
0
    return nrow * nblock * sizeof(block_iq4_xs);
4806
0
}
4807
4808
0
void quantize_row_iq4_xs_ref(const float * GGML_RESTRICT x, block_iq4_xs * GGML_RESTRICT y, int64_t k) {
4809
0
    assert(k % QK_K == 0);
4810
0
    quantize_iq4_xs(x, y, 1, k, NULL);
4811
0
}
4812
4813
// =============================== 2.5625 bpw
4814
4815
0
static void quantize_row_iq2_s_impl(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t n, const float * GGML_RESTRICT quant_weights) {
4816
4817
0
    const int gindex = iq2_data_index(GGML_TYPE_IQ2_S);
4818
4819
0
    const uint64_t * kgrid_q2xs      = iq2_data[gindex].grid;
4820
0
    const int      * kmap_q2xs       = iq2_data[gindex].map;
4821
0
    const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
4822
4823
0
    GGML_ASSERT(kmap_q2xs       && "forgot to call ggml_quantize_init()?");
4824
0
    GGML_ASSERT(kgrid_q2xs      && "forgot to call ggml_quantize_init()?");
4825
0
    GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
4826
0
    GGML_ASSERT(n%QK_K == 0);
4827
4828
0
    const int kMaxQ = 3;
4829
4830
0
    const int64_t nbl = n/QK_K;
4831
4832
0
    block_iq2_s * y = vy;
4833
4834
0
    float scales[QK_K/16];
4835
0
    float weight[16];
4836
0
    float xval[16];
4837
0
    int8_t L[16];
4838
0
    int8_t Laux[16];
4839
0
    float  waux[16];
4840
0
    bool   is_on_grid[2];
4841
0
    bool   is_on_grid_aux[2];
4842
0
    uint8_t block_signs[2];
4843
4844
0
    for (int ibl = 0; ibl < nbl; ++ibl) {
4845
4846
0
        memset(&y[ibl], 0, sizeof(block_iq2_s));
4847
0
        y[ibl].d = GGML_FP32_TO_FP16(0.f);
4848
4849
0
        float max_scale = 0;
4850
4851
0
        const float * xbl = x + QK_K*ibl;
4852
0
        float sumx2 = 0;
4853
0
        for (int i = 0; i < QK_K; ++i) sumx2 += xbl[i]*xbl[i];
4854
0
        float sigma2 = 2*sumx2/QK_K;
4855
4856
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
4857
0
            const float * xb = xbl + 16*ib;
4858
0
            if (quant_weights) {
4859
0
                const float * qw = quant_weights + QK_K*ibl + 16*ib;
4860
0
                for (int i = 0; i < 16; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
4861
0
            } else {
4862
0
                for (int i = 0; i < 16; ++i) weight[i] = 0.25f*sigma2 + xb[i]*xb[i];
4863
0
            }
4864
0
            for (int i = 0; i < 16; ++i) waux[i] = sqrtf(weight[i]);
4865
0
            for (int k = 0; k < 2; ++k) {
4866
0
                uint8_t s = 0;
4867
0
                for (int i = 0; i < 8; ++i) {
4868
0
                    if (xb[8*k + i] >= 0) xval[8*k + i] = xb[8*k + i];
4869
0
                    else {
4870
0
                        xval[8*k + i] = -xb[8*k + i]; s |= (1 << i);
4871
0
                    }
4872
0
                }
4873
0
                block_signs[k] = s;
4874
0
            }
4875
0
            float max = xval[0];
4876
0
            for (int i = 1; i < 16; ++i) max = MAX(max, xval[i]);
4877
0
            if (max < GROUP_MAX_EPS_IQ2_S) {
4878
0
                scales[ib] = 0;
4879
0
                continue;
4880
0
            }
4881
0
            float best = 0;
4882
0
            float scale = max/(2*kMaxQ-1);
4883
0
            is_on_grid[0] = is_on_grid[1] = true;
4884
0
            for (int is = -9; is <= 9; ++is) {
4885
0
                float id = (2*kMaxQ-1+is*0.1f)/max;
4886
0
                float this_scale = 1/id;
4887
0
                for (int k = 0; k < 2; ++k) {
4888
0
                    for (int i = 0; i < 8; ++i) {
4889
0
                        int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
4890
0
                        Laux[8*k+i] = MAX(0, MIN(kMaxQ-1, l));
4891
0
                    }
4892
0
                    uint16_t u = 0;
4893
0
                    for (int i = 0; i < 8; ++i) u |= (Laux[8*k+i] << 2*i);
4894
0
                    int grid_index = kmap_q2xs[u];
4895
0
                    is_on_grid_aux[k] = true;
4896
0
                    if (grid_index < 0) {
4897
0
                        is_on_grid_aux[k] = false;
4898
0
                        const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
4899
0
                        grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, this_scale, Laux + 8*k);
4900
0
                    }
4901
0
                }
4902
0
                float sumqx = 0, sumq2 = 0;
4903
0
                for (int i = 0; i < 16; ++i) {
4904
0
                    float w = weight[i];
4905
0
                    float q = 2*Laux[i] + 1;
4906
0
                    sumqx += w*xval[i]*q;
4907
0
                    sumq2 += w*q*q;
4908
0
                }
4909
0
                if (sumq2 > 0 && sumqx*sumqx > best*sumq2) {
4910
0
                    scale = sumqx/sumq2; best = scale*sumqx;
4911
0
                    for (int i = 0; i < 16; ++i) L[i] = Laux[i];
4912
0
                    for (int k = 0; k <  2; ++k) is_on_grid[k] = is_on_grid_aux[k];
4913
0
                }
4914
0
            }
4915
0
            int n_not_ongrid = 0;
4916
0
            for (int k = 0; k < 2; ++k) if (!is_on_grid[k]) ++n_not_ongrid;
4917
0
            if (n_not_ongrid > 0 && scale > 0) {
4918
0
                float id = 1/scale;
4919
0
                for (int k = 0; k < 2; ++k) {
4920
0
                    if (is_on_grid[k]) continue;
4921
0
                    uint16_t u = 0;
4922
0
                    for (int i = 0; i < 8; ++i) {
4923
0
                        int l = nearest_int(0.5f*(id*xval[8*k+i]-1));
4924
0
                        l = MAX(0, MIN(kMaxQ-1, l));
4925
0
                        u |= (l << 2*i);
4926
0
                        L[8*k + i] = l;
4927
0
                    }
4928
0
                    int grid_index = kmap_q2xs[u];
4929
0
                    if (grid_index < 0) {
4930
0
                        const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
4931
0
                        grid_index = iq2_find_best_neighbour(neighbours, kgrid_q2xs, xval + 8*k, waux + 8*k, scale, L + 8*k);
4932
0
                    }
4933
0
                }
4934
0
                float sumqx = 0, sumq2 = 0;
4935
0
                for (int i = 0; i < 16; ++i) {
4936
0
                    float w = weight[i];
4937
0
                    float q = 2*L[i] + 1;
4938
0
                    sumqx += w*xval[i]*q;
4939
0
                    sumq2 += w*q*q;
4940
0
                }
4941
0
                if (sumq2 > 0) scale = sumqx/sumq2;
4942
0
            }
4943
0
            if (scale < 0) {
4944
0
                scale = -scale;
4945
0
                for (int k = 0; k < 2; ++k) block_signs[k] = ~block_signs[k];
4946
0
            }
4947
0
            for (int k = 0; k < 2; ++k) {
4948
0
                uint16_t u = 0;
4949
0
                for (int i = 0; i < 8; ++i) u |= (L[8*k+i] << 2*i);
4950
0
                int grid_index = kmap_q2xs[u];
4951
0
                if (grid_index < 0) {
4952
0
                    printf("Oops: found point %u not on grid:", u);
4953
0
                    for (int i = 0; i < 8; ++i) printf(" %d", L[8*k+i]);
4954
0
                    printf("\n");
4955
0
                    GGML_ABORT("fatal error");
4956
0
                }
4957
0
                const int i8 = 2*ib + k;
4958
0
                y[ibl].qs[i8] = grid_index & 255;
4959
0
                y[ibl].qh[i8/4] |= ((grid_index >> 8) << 2*(i8%4));
4960
0
                y[ibl].qs[QK_K/8 + i8] = block_signs[k];
4961
0
            }
4962
0
            GGML_ASSERT(scale >= 0);
4963
0
            scales[ib] = scale;
4964
0
            max_scale = MAX(max_scale, scale);
4965
0
        }
4966
4967
0
        if (!max_scale) {
4968
0
            continue;
4969
0
        }
4970
4971
0
        float d = max_scale/31;
4972
0
        y[ibl].d = GGML_FP32_TO_FP16(d * 0.9875f);
4973
0
        float id = 1/d;
4974
0
        for (int ib = 0; ib < QK_K/16; ++ib) {
4975
0
            int l = nearest_int(0.5f*(id*scales[ib]-1));
4976
0
            l = MAX(0, MIN(15, l));
4977
0
            if (ib%2 == 0) y[ibl].scales[ib/2] = l;
4978
0
            else y[ibl].scales[ib/2] |= (l << 4);
4979
0
        }
4980
0
    }
4981
0
}
4982
4983
0
size_t quantize_iq2_s(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst, int64_t nrow, int64_t n_per_row, const float * quant_weights) {
4984
0
    GGML_ASSERT(n_per_row%QK_K == 0);
4985
0
    int64_t nblock = n_per_row/QK_K;
4986
0
    char * qrow = (char *)dst;
4987
0
    for (int64_t row = 0; row < nrow; ++row) {
4988
0
        quantize_row_iq2_s_impl(src, qrow, n_per_row, quant_weights);
4989
0
        src += n_per_row;
4990
0
        qrow += nblock*sizeof(block_iq2_s);
4991
0
    }
4992
0
    return nrow * nblock * sizeof(block_iq2_s);
4993
0
}
4994
4995
0
void quantize_row_iq2_s_ref(const float * GGML_RESTRICT x, block_iq2_s * GGML_RESTRICT y, int64_t k) {
4996
0
    assert(k % QK_K == 0);
4997
0
    quantize_iq2_s(x, y, 1, k, NULL);
4998
0
}
4999
5000
// =============================== data validation
5001
5002
0
static bool validate_float(float f, size_t i) {
5003
0
    if (isinf(f)) {
5004
0
        fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
5005
0
        return false;
5006
0
    }
5007
5008
0
    if (isnan(f)) {
5009
0
        fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
5010
0
        return false;
5011
0
    }
5012
5013
0
    return true;
5014
0
}
5015
5016
0
static bool isinf_fp16(ggml_fp16_t f) {
5017
0
    return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) == 0;
5018
0
}
5019
5020
0
static bool isnan_fp16(ggml_fp16_t f) {
5021
0
    return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0;
5022
0
}
5023
5024
0
static bool validate_fp16(ggml_fp16_t f, size_t i) {
5025
0
    if (isinf_fp16(f)) {
5026
0
        fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
5027
0
        return false;
5028
0
    }
5029
5030
0
    if (isnan_fp16(f)) {
5031
0
        fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
5032
0
        return false;
5033
0
    }
5034
5035
0
    return true;
5036
0
}
5037
5038
0
static bool validate_e_e8m0(uint8_t e, size_t i) {
5039
0
    if (e == 0xff) {
5040
0
        fprintf(stderr, "ggml_validate_row_data: found invalid e value %d at block %zu\n", e, i);
5041
0
        return false;
5042
0
    }
5043
5044
0
    return true;
5045
0
}
5046
5047
#define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
5048
0
    const type * q = (const type *) (data); \
5049
0
    for (size_t i = 0; i < (nb); ++i) { \
5050
0
        if (!validate_fp16(q[i].d, i)) { \
5051
0
            return false; \
5052
0
        } \
5053
0
    }
5054
5055
#define VALIDATE_ROW_DATA_DM_F16_IMPL(type, data, nb, d, m) \
5056
0
    const type * q = (const type *) (data); \
5057
0
    for (size_t i = 0; i < (nb); ++i) { \
5058
0
        if (!validate_fp16(q[i].d, i) || !validate_fp16(q[i].m, i)) { \
5059
0
            return false; \
5060
0
        } \
5061
0
    }
5062
5063
#define VALIDATE_ROW_DATA_E_E8M0_IMPL(type, data, nb) \
5064
0
    const type * q = (const type *) (data); \
5065
0
    for (size_t i = 0; i < (nb); ++i) { \
5066
0
        if (!validate_e_e8m0(q[i].e, i)) { \
5067
0
            return false; \
5068
0
        } \
5069
0
    }
5070
5071
#define VALIDATE_ROW_DATA_DVEC_F16_IMPL(type, data, nb, nr) \
5072
    const type * q = (const type *) (data); \
5073
    for (size_t i = 0; i < (nb); ++i) { \
5074
        for (size_t j = 0; j < (nr); ++j) { \
5075
            if (!validate_fp16(q[i].d[j], i)) { \
5076
                return false; \
5077
            } \
5078
        } \
5079
    }
5080
5081
0
bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
5082
0
    if (type < 0 || type >= GGML_TYPE_COUNT) {
5083
0
        fprintf(stderr, "%s: invalid type %d\n", __func__, type);
5084
0
        return false;
5085
0
    }
5086
5087
0
    if (nbytes % ggml_type_size(type) != 0) {
5088
0
        fprintf(stderr, "%s: invalid size %zu for type %s (type size = %zu)\n", __func__, nbytes, ggml_type_name(type), ggml_type_size(type));
5089
0
        return false;
5090
0
    }
5091
5092
0
    const size_t nb = nbytes/ggml_type_size(type);
5093
5094
0
    switch (type) {
5095
0
        case GGML_TYPE_BF16:
5096
0
            {
5097
0
                int nans = 0;
5098
0
                int infs = 0;
5099
0
                const unsigned short * f = (const unsigned short *) data;
5100
0
                for (size_t i = 0; i < nb; ++i) {
5101
0
                    nans += (f[i] & 0x7fff) > 0x7f80;
5102
0
                    infs += (f[i] & 0x7fff) == 0x7f80;
5103
0
                }
5104
0
                if (nans) {
5105
0
                    fprintf(stderr, "%s: found %d NaNs in row of %zu BF16 values\n", __func__, nans, nb);
5106
0
                    return false;
5107
0
                }
5108
0
                if (infs) {
5109
0
                    fprintf(stderr, "%s: found %d infinities in row of %zu BF16 values\n", __func__, infs, nb);
5110
0
                    return false;
5111
0
                }
5112
0
            } break;
5113
0
        case GGML_TYPE_F16:
5114
0
            {
5115
0
                const ggml_fp16_t * f = (const ggml_fp16_t *) data;
5116
0
                size_t i = 0;
5117
#if defined(__AVX2__)
5118
                for (; i + 15 < nb; i += 16) {
5119
                    __m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
5120
                    __m256i vexp = _mm256_and_si256(v, _mm256_set1_epi16(0x7c00));
5121
                    __m256i cmp = _mm256_cmpeq_epi16(vexp, _mm256_set1_epi16(0x7c00));
5122
                    int mask = _mm256_movemask_epi8(cmp);
5123
                    if (mask) {
5124
                        for (size_t j = 0; j < 16; ++j) {
5125
                            if (!validate_fp16(f[i + j], i + j)) {
5126
                                return false;
5127
                            }
5128
                        }
5129
                        GGML_UNREACHABLE();
5130
                    }
5131
                }
5132
#elif defined(__ARM_NEON)
5133
                for (; i + 7 < nb; i += 8) {
5134
                    uint16x8_t v = vld1q_u16(f + i);
5135
                    uint16x8_t vexp = vandq_u16(v, vdupq_n_u16(0x7c00));
5136
                    uint16x8_t cmp = vceqq_u16(vexp, vdupq_n_u16(0x7c00));
5137
                    uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0);
5138
                    if (mask) {
5139
                        for (size_t j = 0; j < 8; ++j) {
5140
                            if (!validate_fp16(f[i + j], i + j)) {
5141
                                return false;
5142
                            }
5143
                        }
5144
                        GGML_UNREACHABLE();
5145
                    }
5146
                }
5147
#endif
5148
0
                for (; i < nb; ++i) {
5149
0
                    if (!validate_fp16(f[i], i)) {
5150
0
                        return false;
5151
0
                    }
5152
0
                }
5153
0
            } break;
5154
0
        case GGML_TYPE_F32:
5155
0
            {
5156
0
                const float * f = (const float *) data;
5157
0
                size_t i = 0;
5158
#if defined(__AVX2__)
5159
                for (; i + 7 < nb; i += 8) {
5160
                    __m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
5161
                    __m256i vexp = _mm256_and_si256(v, _mm256_set1_epi32(0x7f800000));
5162
                    __m256i cmp = _mm256_cmpeq_epi32(vexp, _mm256_set1_epi32(0x7f800000));
5163
                    int mask = _mm256_movemask_epi8(cmp);
5164
                    if (mask) {
5165
                        for (size_t j = 0; j < 8; ++j) {
5166
                            if (!validate_float(f[i + j], i + j)) {
5167
                                return false;
5168
                            }
5169
                        }
5170
                        GGML_UNREACHABLE();
5171
                    }
5172
                }
5173
#elif defined(__ARM_NEON)
5174
                for (; i + 3 < nb; i += 4) {
5175
                    uint32x4_t v = vld1q_u32((const uint32_t *)f + i);
5176
                    uint32x4_t vexp = vandq_u32(v, vdupq_n_u32(0x7f800000));
5177
                    uint32x4_t cmp = vceqq_u32(vexp, vdupq_n_u32(0x7f800000));
5178
                    uint64_t mask = vget_lane_u64(vreinterpret_u64_u16(vshrn_n_u32(cmp, 8)), 0);
5179
                    if (mask) {
5180
                        for (size_t j = 0; j < 4; ++j) {
5181
                            if (!validate_float(f[i + j], i + j)) {
5182
                                return false;
5183
                            }
5184
                        }
5185
                        GGML_UNREACHABLE();
5186
                    }
5187
                }
5188
#endif
5189
0
                for (; i < nb; ++i) {
5190
0
                    if (!validate_float(f[i], i)) {
5191
0
                        return false;
5192
0
                    }
5193
0
                }
5194
0
            } break;
5195
0
        case GGML_TYPE_F64:
5196
0
            {
5197
0
                const double * f = (const double *) data;
5198
0
                for (size_t i = 0; i < nb; ++i) {
5199
0
                    if (!validate_float(f[i], i)) {
5200
0
                        return false;
5201
0
                    }
5202
0
                }
5203
0
            } break;
5204
0
        case GGML_TYPE_Q4_0:
5205
0
            {
5206
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
5207
0
            } break;
5208
0
        case GGML_TYPE_Q4_1:
5209
0
            {
5210
0
                VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_1, data, nb, d, m);
5211
0
            } break;
5212
0
        case GGML_TYPE_Q5_0:
5213
0
            {
5214
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_0, data, nb);
5215
0
            } break;
5216
0
        case GGML_TYPE_Q5_1:
5217
0
            {
5218
0
                VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_1, data, nb, d, m);
5219
0
            } break;
5220
0
        case GGML_TYPE_Q8_0:
5221
0
            {
5222
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
5223
0
            } break;
5224
0
        case GGML_TYPE_MXFP4:
5225
0
            {
5226
0
                VALIDATE_ROW_DATA_E_E8M0_IMPL(block_mxfp4, data, nb);
5227
0
            } break;
5228
0
        case GGML_TYPE_Q2_K:
5229
0
            {
5230
0
                VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
5231
0
            } break;
5232
0
        case GGML_TYPE_Q3_K:
5233
0
            {
5234
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_q3_K, data, nb);
5235
0
            } break;
5236
0
        case GGML_TYPE_Q4_K:
5237
0
            {
5238
0
                VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d, dmin);
5239
0
            } break;
5240
0
        case GGML_TYPE_Q5_K:
5241
0
            {
5242
0
                VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_K, data, nb, d, dmin);
5243
0
            } break;
5244
0
        case GGML_TYPE_Q6_K:
5245
0
            {
5246
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_q6_K, data, nb);
5247
0
            } break;
5248
0
        case GGML_TYPE_Q8_K:
5249
0
            {
5250
0
                const block_q8_K * q = (const block_q8_K *) data;
5251
0
                for (size_t i = 0; i < nb; ++i) {
5252
0
                    if (!validate_float(q[i].d, i)) {
5253
0
                        return false;
5254
0
                    }
5255
0
                }
5256
0
            } break;
5257
0
        case GGML_TYPE_TQ1_0:
5258
0
            {
5259
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_tq1_0, data, nb);
5260
0
            } break;
5261
0
        case GGML_TYPE_TQ2_0:
5262
0
            {
5263
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_tq2_0, data, nb);
5264
0
            } break;
5265
0
        case GGML_TYPE_IQ1_S:
5266
0
            {
5267
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq1_s, data, nb);
5268
0
            } break;
5269
0
        case GGML_TYPE_IQ1_M:
5270
0
            {
5271
0
                const block_iq1_m * q = (const block_iq1_m *) data;
5272
0
                for (size_t i = 0; i < nb; ++i) {
5273
0
                    iq1m_scale_t scale;
5274
0
                    const uint16_t * sc = (const uint16_t *)q[i].scales;
5275
0
                    scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
5276
0
                    if (!validate_fp16(scale.f16, i)) {
5277
0
                        return false;
5278
0
                    }
5279
0
                }
5280
0
            } break;
5281
0
        case GGML_TYPE_IQ2_XXS:
5282
0
            {
5283
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xxs, data, nb);
5284
0
            } break;
5285
0
        case GGML_TYPE_IQ2_XS:
5286
0
            {
5287
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xs, data, nb);
5288
0
            } break;
5289
0
        case GGML_TYPE_IQ2_S:
5290
0
            {
5291
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_s, data, nb);
5292
0
            } break;
5293
0
        case GGML_TYPE_IQ3_XXS:
5294
0
            {
5295
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_xxs, data, nb);
5296
0
            } break;
5297
5298
0
        case GGML_TYPE_IQ3_S:
5299
0
            {
5300
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_s, data, nb);
5301
0
            } break;
5302
0
        case GGML_TYPE_IQ4_XS:
5303
0
            {
5304
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_xs, data, nb);
5305
0
            } break;
5306
0
        case GGML_TYPE_IQ4_NL:
5307
0
            {
5308
0
                VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
5309
0
            } break;
5310
5311
0
        case GGML_TYPE_I8:
5312
0
        case GGML_TYPE_I16:
5313
0
        case GGML_TYPE_I32:
5314
0
        case GGML_TYPE_I64:
5315
            // nothing to validate
5316
0
            break;
5317
0
        default:
5318
0
            {
5319
0
                fprintf(stderr, "%s: invalid type %d\n", __func__, type);
5320
0
                return false;
5321
0
            }
5322
0
    }
5323
5324
0
    return true;
5325
0
}