/src/llama.cpp/ggml/src/ggml-cpu/common.h
Line | Count | Source |
1 | | #pragma once |
2 | | |
3 | | #include "ggml.h" |
4 | | #include "traits.h" |
5 | | #include "ggml-cpu-impl.h" |
6 | | #include "ggml-impl.h" |
7 | | #include "simd-mappings.h" |
8 | | |
9 | 0 | #define GGML_FA_TILE_Q 64 |
10 | 0 | #define GGML_FA_TILE_KV 64 |
11 | | |
12 | | #ifdef __cplusplus |
13 | | |
14 | | #include <utility> |
15 | | |
16 | | // convenience functions/macros for use in template calls |
17 | | // note: these won't be required after the 'traits' lookup table is used. |
18 | 0 | static inline ggml_fp16_t f32_to_f16(float x) { |
19 | 0 | return GGML_CPU_FP32_TO_FP16(x); |
20 | 0 | } Unexecuted instantiation: binary-ops.cpp:f32_to_f16(float) Unexecuted instantiation: unary-ops.cpp:f32_to_f16(float) Unexecuted instantiation: ops.cpp:f32_to_f16(float) |
21 | | |
22 | 0 | static inline float f16_to_f32(ggml_fp16_t x) { |
23 | 0 | return GGML_CPU_FP16_TO_FP32(x); |
24 | 0 | } Unexecuted instantiation: binary-ops.cpp:f16_to_f32(unsigned short) Unexecuted instantiation: unary-ops.cpp:f16_to_f32(unsigned short) Unexecuted instantiation: ops.cpp:f16_to_f32(unsigned short) |
25 | | |
26 | 0 | static inline ggml_bf16_t f32_to_bf16(float x) { |
27 | 0 | return GGML_FP32_TO_BF16(x); |
28 | 0 | } Unexecuted instantiation: binary-ops.cpp:f32_to_bf16(float) Unexecuted instantiation: unary-ops.cpp:f32_to_bf16(float) Unexecuted instantiation: ops.cpp:f32_to_bf16(float) |
29 | | |
30 | 0 | static inline float bf16_to_f32(ggml_bf16_t x) { |
31 | 0 | return GGML_BF16_TO_FP32(x); |
32 | 0 | } Unexecuted instantiation: binary-ops.cpp:bf16_to_f32(ggml_bf16_t) Unexecuted instantiation: unary-ops.cpp:bf16_to_f32(ggml_bf16_t) Unexecuted instantiation: ops.cpp:bf16_to_f32(ggml_bf16_t) |
33 | | |
34 | 0 | static inline float i32_to_f32(int32_t x) { |
35 | 0 | return x; |
36 | 0 | } Unexecuted instantiation: binary-ops.cpp:i32_to_f32(int) Unexecuted instantiation: unary-ops.cpp:i32_to_f32(int) Unexecuted instantiation: ops.cpp:i32_to_f32(int) |
37 | | |
38 | 0 | static inline int32_t f32_to_i32(float x) { |
39 | 0 | return x; |
40 | 0 | } Unexecuted instantiation: binary-ops.cpp:f32_to_i32(float) Unexecuted instantiation: unary-ops.cpp:f32_to_i32(float) Unexecuted instantiation: ops.cpp:f32_to_i32(float) |
41 | | |
42 | 0 | static inline float f32_to_f32(float x) { |
43 | 0 | return x; |
44 | 0 | } Unexecuted instantiation: binary-ops.cpp:f32_to_f32(float) Unexecuted instantiation: unary-ops.cpp:f32_to_f32(float) Unexecuted instantiation: ops.cpp:f32_to_f32(float) |
45 | | |
46 | | // TODO - merge this into the traits table, after using row-based conversions |
47 | | template <class T> |
48 | | struct type_conversion_table; |
49 | | |
50 | | template <> |
51 | | struct type_conversion_table<ggml_fp16_t> { |
52 | | static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32; |
53 | | static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16; |
54 | | }; |
55 | | |
56 | | template <> |
57 | | struct type_conversion_table<float> { |
58 | | static constexpr float (*to_f32)(float) = f32_to_f32; |
59 | | static constexpr float (*from_f32)(float) = f32_to_f32; |
60 | | }; |
61 | | |
62 | | template <> |
63 | | struct type_conversion_table<ggml_bf16_t> { |
64 | | static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32; |
65 | | static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16; |
66 | | }; |
67 | | |
68 | | template <> |
69 | | struct type_conversion_table<int32_t> { |
70 | | static constexpr float (*to_f32)(int32_t) = i32_to_f32; |
71 | | static constexpr int32_t (*from_f32)(float) = f32_to_i32; |
72 | | }; |
73 | | |
74 | 0 | static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) { |
75 | 0 | const int64_t ith = params->ith; |
76 | 0 | const int64_t nth = params->nth; |
77 | |
|
78 | 0 | const int64_t nr = ggml_nrows(src0); |
79 | | |
80 | | // rows per thread |
81 | 0 | const int64_t dr = (nr + nth - 1)/nth; |
82 | | |
83 | | // row range for this thread |
84 | 0 | const int64_t ir0 = dr*ith; |
85 | 0 | const int64_t ir1 = MIN(ir0 + dr, nr); |
86 | |
|
87 | 0 | return {ir0, ir1}; |
88 | 0 | } Unexecuted instantiation: binary-ops.cpp:get_thread_range(ggml_compute_params const*, ggml_tensor const*) Unexecuted instantiation: unary-ops.cpp:get_thread_range(ggml_compute_params const*, ggml_tensor const*) Unexecuted instantiation: ops.cpp:get_thread_range(ggml_compute_params const*, ggml_tensor const*) |
89 | | |
90 | | struct ggml_fa_tile_config { |
91 | | static constexpr size_t Q = GGML_FA_TILE_Q; |
92 | | static constexpr size_t KV = GGML_FA_TILE_KV; |
93 | | }; |
94 | | |
95 | | #endif |