/src/xnnpack/src/configs/pack-lh-config.c
Line | Count | Source |
1 | | // Copyright 2024 Google LLC |
2 | | // |
3 | | // This source code is licensed under the BSD-style license found in the |
4 | | // LICENSE file in the root directory of this source tree. |
5 | | |
6 | | #include <assert.h> |
7 | | #include <stddef.h> |
8 | | |
9 | | #include "src/xnnpack/common.h" |
10 | | #include "src/xnnpack/config-types.h" |
11 | | #include "src/xnnpack/config.h" |
12 | | #include "src/xnnpack/hardware-config.h" |
13 | | #include "src/xnnpack/init-once.h" |
14 | | #include "src/xnnpack/microfnptr.h" |
15 | | #include "src/xnnpack/pack-lh.h" |
16 | | #include "src/xnnpack/packq.h" |
17 | | |
18 | | static struct xnn_pack_lh_config qp8_pack_lh_config = {0}; |
19 | | static struct xnn_pack_lh_config x8_pack_lh_config = {0}; |
20 | | static struct xnn_pack_lh_config x16_pack_lh_config = {0}; |
21 | | static struct xnn_pack_lh_config x32_pack_lh_config = {0}; |
22 | | static struct xnn_pack_lh_config x8_igemm_pack_lh_config = {0}; |
23 | | |
24 | | XNN_INIT_ONCE_GUARD(qp8_pack_lh); |
25 | | XNN_INIT_ONCE_GUARD(x8_pack_lh); |
26 | | XNN_INIT_ONCE_GUARD(x16_pack_lh); |
27 | | XNN_INIT_ONCE_GUARD(x32_pack_lh); |
28 | | XNN_INIT_ONCE_GUARD(x8_igemm_pack_lh); |
29 | | |
30 | 0 | static void init_qp8_pack_lh_config(void) { |
31 | | #if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
32 | | qp8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__aarch64_neon_u2; |
33 | | #else |
34 | 0 | qp8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn)xnn_x8_packq_f32qp8_ukernel__scalar_u1; |
35 | 0 | #endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
36 | 0 | qp8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn)xnn_x8_packq_f32qp8_packed_size; |
37 | 0 | qp8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn)xnn_x8_packq_f32qp8_packed_offset; |
38 | 0 | qp8_pack_lh_config.log2_input_element_size = XNN_LOG2_SIZEOF_FLOAT; |
39 | 0 | qp8_pack_lh_config.log2_packed_element_size = 0; |
40 | 0 | } |
41 | | |
42 | 0 | const struct xnn_pack_lh_config* xnn_init_qp8_pack_lh_config() { |
43 | 0 | const struct xnn_hardware_config* hardware_config = |
44 | 0 | xnn_init_hardware_config(); |
45 | 0 | if (hardware_config == NULL) { |
46 | 0 | return NULL; |
47 | 0 | } |
48 | 0 | XNN_INIT_ONCE(qp8_pack_lh); |
49 | 0 | return &qp8_pack_lh_config; |
50 | 0 | } |
51 | | |
52 | 0 | static void init_x32_pack_lh_config(void) { |
53 | | #if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
54 | | #if XNN_ENABLE_ARM_SME2 || XNN_ENABLE_ARM_SME |
55 | | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
56 | | assert(hardware_config != NULL); |
57 | | if (hardware_config->arch_flags & xnn_arch_arm_sme) { |
58 | | x32_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x32_pack_lh_ukernel__neonsme; |
59 | | x32_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x32_pack_lh_size__neonsme; |
60 | | x32_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x32_pack_lh_offset__neonsme; |
61 | | } |
62 | | #endif // XNN_ENABLE_ARM_SME2 || XNN_ENABLE_ARM_SME |
63 | | #endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
64 | 0 | x32_pack_lh_config.log2_input_element_size = 2; |
65 | 0 | x32_pack_lh_config.log2_packed_element_size = 2; |
66 | 0 | x32_pack_lh_config.gemv_noop = true; |
67 | 0 | } |
68 | | |
69 | 0 | const struct xnn_pack_lh_config* xnn_init_x32_pack_lh_config() { |
70 | 0 | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
71 | 0 | if (hardware_config == NULL) { |
72 | 0 | return NULL; |
73 | 0 | } |
74 | 0 | XNN_INIT_ONCE(x32_pack_lh); |
75 | 0 | return &x32_pack_lh_config; |
76 | 0 | } |
77 | | |
78 | 0 | static void init_x16_pack_lh_config(void) { |
79 | | #if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
80 | | #if XNN_ENABLE_ARM_SME2 |
81 | | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
82 | | assert(hardware_config != NULL); |
83 | | if (hardware_config->arch_flags & xnn_arch_arm_sme2) { |
84 | | x16_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x16_pack_lh_ukernel__neonsme2; |
85 | | x16_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x16_pack_lh_size__neonsme2; |
86 | | x16_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x16_pack_lh_offset__neonsme2; |
87 | | } |
88 | | #endif // XNN_ENABLE_ARM_SME2 |
89 | | #endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
90 | 0 | x16_pack_lh_config.log2_input_element_size = 1; |
91 | 0 | x16_pack_lh_config.log2_packed_element_size = 1; |
92 | 0 | x16_pack_lh_config.gemv_noop = true; |
93 | 0 | } |
94 | | |
95 | 0 | const struct xnn_pack_lh_config* xnn_init_x16_pack_lh_config() { |
96 | 0 | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
97 | 0 | if (hardware_config == NULL) { |
98 | 0 | return NULL; |
99 | 0 | } |
100 | 0 | XNN_INIT_ONCE(x16_pack_lh); |
101 | 0 | return &x16_pack_lh_config; |
102 | 0 | } |
103 | | |
104 | 0 | static void init_x8_pack_lh_config(void) { |
105 | | #if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
106 | | #if XNN_ENABLE_ARM_SME2 |
107 | | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
108 | | assert(hardware_config != NULL); |
109 | | if (hardware_config->arch_flags & xnn_arch_arm_sme2) { |
110 | | x8_pack_lh_config.pack_lh_fn = (xnn_pack_lh_ukernel_fn) xnn_x8_pack_lh_ukernel__neonsme2; |
111 | | x8_pack_lh_config.size_fn = (xnn_pack_lh_size_fn) xnn_x8_pack_lh_size__neonsme2; |
112 | | x8_pack_lh_config.offset_fn = (xnn_pack_lh_offset_fn) xnn_x8_pack_lh_offset__neonsme2; |
113 | | } |
114 | | #endif // XNN_ENABLE_ARM_SME2 |
115 | | #endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
116 | 0 | x8_pack_lh_config.log2_input_element_size = 0; |
117 | 0 | x8_pack_lh_config.log2_packed_element_size = 0; |
118 | 0 | x8_pack_lh_config.gemv_noop = true; |
119 | 0 | } |
120 | | |
121 | 0 | const struct xnn_pack_lh_config* xnn_init_x8_pack_lh_config() { |
122 | 0 | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
123 | 0 | if (hardware_config == NULL) { |
124 | 0 | return NULL; |
125 | 0 | } |
126 | 0 | XNN_INIT_ONCE(x8_pack_lh); |
127 | 0 | return &x8_pack_lh_config; |
128 | 0 | } |
129 | | |
130 | 0 | static void init_x8_igemm_pack_lh_config(void) { |
131 | | #if XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
132 | | #if XNN_ENABLE_ARM_SME2 |
133 | | const struct xnn_hardware_config* hardware_config = xnn_init_hardware_config(); |
134 | | assert(hardware_config != NULL); |
135 | | if (hardware_config->arch_flags & xnn_arch_arm_sme2) { |
136 | | x8_igemm_pack_lh_config.pack_lh_for_igemm_fn = (xnn_pack_lh_igemm_ukernel_fn) xnn_x8_pack_lh_ukernel__igemm_neonsme2; |
137 | | x8_igemm_pack_lh_config.size_for_igemm_fn = (xnn_pack_lh_igemm_size_fn) xnn_x8_pack_lh_size__igemm_neonsme2; |
138 | | x8_igemm_pack_lh_config.offset_for_igemm_fn = (xnn_pack_lh_igemm_offset_fn) xnn_x8_pack_lh_offset__igemm_neonsme2; |
139 | | } |
140 | | #endif // XNN_ENABLE_ARM_SME2 |
141 | | #endif // XNN_ARCH_ARM64 && XNN_ENABLE_KLEIDIAI |
142 | 0 | x8_igemm_pack_lh_config.log2_input_element_size = 0; |
143 | 0 | x8_igemm_pack_lh_config.log2_packed_element_size = 0; |
144 | 0 | } |
145 | | |
146 | 0 | const struct xnn_pack_lh_config* xnn_init_x8_igemm_pack_lh_config() { |
147 | 0 | const struct xnn_hardware_config* hardware_config = |
148 | 0 | xnn_init_hardware_config(); |
149 | 0 | if (hardware_config == NULL) { |
150 | 0 | return NULL; |
151 | 0 | } |
152 | 0 | XNN_INIT_ONCE(x8_igemm_pack_lh); |
153 | 0 | return &x8_igemm_pack_lh_config; |
154 | 0 | } |