/src/llama.cpp/ggml/src/ggml-cpu/common.h

Source
#pragma once

#include "ggml.h"
#include "traits.h"
#include "ggml-cpu-impl.h"
#include "ggml-impl.h"
#include "simd-mappings.h"

#define GGML_FA_TILE_Q  64
#define GGML_FA_TILE_KV 64

#ifdef __cplusplus

#include <utility>

// convenience functions/macros for use in template calls
// note: these won't be required after the 'traits' lookup table is used.
static inline ggml_fp16_t f32_to_f16(float x) {
    return GGML_CPU_FP32_TO_FP16(x);
}

static inline float f16_to_f32(ggml_fp16_t x) {
    return GGML_CPU_FP16_TO_FP32(x);
}

static inline ggml_bf16_t f32_to_bf16(float x) {
    return GGML_FP32_TO_BF16(x);
}

static inline float bf16_to_f32(ggml_bf16_t x) {
    return GGML_BF16_TO_FP32(x);
}

static inline float i32_to_f32(int32_t x) {
    return x;
}

static inline int32_t f32_to_i32(float x) {
    return x;
}

static inline float f32_to_f32(float x) {
    return x;
}

// TODO - merge this into the traits table, after using row-based conversions
template <class T>
struct type_conversion_table;

template <>
struct type_conversion_table<ggml_fp16_t> {
    static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
    static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
};

template <>
struct type_conversion_table<float> {
    static constexpr float (*to_f32)(float) = f32_to_f32;
    static constexpr float (*from_f32)(float) = f32_to_f32;
};

template <>
struct type_conversion_table<ggml_bf16_t> {
    static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
    static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
};

template <>
struct type_conversion_table<int32_t> {
    static constexpr float (*to_f32)(int32_t) = i32_to_f32;
    static constexpr int32_t (*from_f32)(float) = f32_to_i32;
};

static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
    const int64_t ith = params->ith;
    const int64_t nth = params->nth;

    const int64_t nr  = ggml_nrows(src0);

    // rows per thread
    const int64_t dr = (nr + nth - 1)/nth;

    // row range for this thread
    const int64_t ir0 = dr*ith;
    const int64_t ir1 = MIN(ir0 + dr, nr);

    return {ir0, ir1};
}

struct ggml_fa_tile_config {
    static constexpr size_t Q  = GGML_FA_TILE_Q;
    static constexpr size_t KV = GGML_FA_TILE_KV;
};

#endif

Line	Count	Source
1		#pragma once
2
3		#include "ggml.h"
4		#include "traits.h"
5		#include "ggml-cpu-impl.h"
6		#include "ggml-impl.h"
7		#include "simd-mappings.h"
8
9	0	#define GGML_FA_TILE_Q 64
10	0	#define GGML_FA_TILE_KV 64
11
12		#ifdef __cplusplus
13
14		#include <utility>
15
16		// convenience functions/macros for use in template calls
17		// note: these won't be required after the 'traits' lookup table is used.
18	0	static inline ggml_fp16_t f32_to_f16(float x) {
19	0	return GGML_CPU_FP32_TO_FP16(x);
20	0	} Unexecuted instantiation: binary-ops.cpp:f32_to_f16(float) Unexecuted instantiation: unary-ops.cpp:f32_to_f16(float) Unexecuted instantiation: ops.cpp:f32_to_f16(float)
21
22	0	static inline float f16_to_f32(ggml_fp16_t x) {
23	0	return GGML_CPU_FP16_TO_FP32(x);
24	0	} Unexecuted instantiation: binary-ops.cpp:f16_to_f32(unsigned short) Unexecuted instantiation: unary-ops.cpp:f16_to_f32(unsigned short) Unexecuted instantiation: ops.cpp:f16_to_f32(unsigned short)
25
26	0	static inline ggml_bf16_t f32_to_bf16(float x) {
27	0	return GGML_FP32_TO_BF16(x);
28	0	} Unexecuted instantiation: binary-ops.cpp:f32_to_bf16(float) Unexecuted instantiation: unary-ops.cpp:f32_to_bf16(float) Unexecuted instantiation: ops.cpp:f32_to_bf16(float)
29
30	0	static inline float bf16_to_f32(ggml_bf16_t x) {
31	0	return GGML_BF16_TO_FP32(x);
32	0	} Unexecuted instantiation: binary-ops.cpp:bf16_to_f32(ggml_bf16_t) Unexecuted instantiation: unary-ops.cpp:bf16_to_f32(ggml_bf16_t) Unexecuted instantiation: ops.cpp:bf16_to_f32(ggml_bf16_t)
33
34	0	static inline float i32_to_f32(int32_t x) {
35	0	return x;
36	0	} Unexecuted instantiation: binary-ops.cpp:i32_to_f32(int) Unexecuted instantiation: unary-ops.cpp:i32_to_f32(int) Unexecuted instantiation: ops.cpp:i32_to_f32(int)
37
38	0	static inline int32_t f32_to_i32(float x) {
39	0	return x;
40	0	} Unexecuted instantiation: binary-ops.cpp:f32_to_i32(float) Unexecuted instantiation: unary-ops.cpp:f32_to_i32(float) Unexecuted instantiation: ops.cpp:f32_to_i32(float)
41
42	0	static inline float f32_to_f32(float x) {
43	0	return x;
44	0	} Unexecuted instantiation: binary-ops.cpp:f32_to_f32(float) Unexecuted instantiation: unary-ops.cpp:f32_to_f32(float) Unexecuted instantiation: ops.cpp:f32_to_f32(float)
45
46		// TODO - merge this into the traits table, after using row-based conversions
47		template <class T>
48		struct type_conversion_table;
49
50		template <>
51		struct type_conversion_table<ggml_fp16_t> {
52		static constexpr float (*to_f32)(ggml_fp16_t) = f16_to_f32;
53		static constexpr ggml_fp16_t (*from_f32)(float) = f32_to_f16;
54		};
55
56		template <>
57		struct type_conversion_table<float> {
58		static constexpr float (*to_f32)(float) = f32_to_f32;
59		static constexpr float (*from_f32)(float) = f32_to_f32;
60		};
61
62		template <>
63		struct type_conversion_table<ggml_bf16_t> {
64		static constexpr float (*to_f32)(ggml_bf16_t) = bf16_to_f32;
65		static constexpr ggml_bf16_t (*from_f32)(float) = f32_to_bf16;
66		};
67
68		template <>
69		struct type_conversion_table<int32_t> {
70		static constexpr float (*to_f32)(int32_t) = i32_to_f32;
71		static constexpr int32_t (*from_f32)(float) = f32_to_i32;
72		};
73
74	0	static std::pair<int64_t, int64_t> get_thread_range(const struct ggml_compute_params * params, const struct ggml_tensor * src0) {
75	0	const int64_t ith = params->ith;
76	0	const int64_t nth = params->nth;
77
78	0	const int64_t nr = ggml_nrows(src0);
79
80		// rows per thread
81	0	const int64_t dr = (nr + nth - 1)/nth;
82
83		// row range for this thread
84	0	const int64_t ir0 = dr*ith;
85	0	const int64_t ir1 = MIN(ir0 + dr, nr);
86
87	0	return {ir0, ir1};
88	0	} Unexecuted instantiation: binary-ops.cpp:get_thread_range(ggml_compute_params const, ggml_tensor const) Unexecuted instantiation: unary-ops.cpp:get_thread_range(ggml_compute_params const, ggml_tensor const) Unexecuted instantiation: ops.cpp:get_thread_range(ggml_compute_params const, ggml_tensor const)
89
90		struct ggml_fa_tile_config {
91		static constexpr size_t Q = GGML_FA_TILE_Q;
92		static constexpr size_t KV = GGML_FA_TILE_KV;
93		};
94
95		#endif

Coverage Report

Created: 2026-03-07 06:35