/src/serenity/AK/SIMDExtras.h
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> |
3 | | * |
4 | | * SPDX-License-Identifier: BSD-2-Clause |
5 | | */ |
6 | | |
7 | | #pragma once |
8 | | |
9 | | #include <AK/BitCast.h> |
10 | | #include <AK/Concepts.h> |
11 | | #include <AK/SIMD.h> |
12 | | |
13 | | namespace AK::SIMD { |
14 | | |
15 | | // SIMD Vector Expansion |
16 | | |
17 | | namespace Detail { |
18 | | |
19 | | template<SIMDVector V, typename T, size_t... Is> |
20 | | requires(SameAs<T, ElementOf<V>>) |
21 | | ALWAYS_INLINE static constexpr V expand_to_impl(T t, IndexSequence<Is...> const&) |
22 | 0 | { |
23 | 0 | return V { ((void)Is, t)... }; |
24 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: ImageDecoder.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: ImageDecoder.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: ICOLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: ICOLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: ICOLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: PNGLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: PNGLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: PNGLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: Font.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: Font.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: Font.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: SHA2.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: SHA2.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: SHA2.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: FuzzPNGLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: FuzzPNGLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: FuzzPNGLoader.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: Filter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: Filter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: Filter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: AES.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: AES.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: AES.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: SHA1.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: SHA1.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: SHA1.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: HTMLCanvasElement.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: HTMLCanvasElement.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: HTMLCanvasElement.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: PNGWriter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: PNGWriter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: PNGWriter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: BytecodeInterpreter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_jjJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: BytecodeInterpreter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_ffJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE Unexecuted instantiation: BytecodeInterpreter.cpp:_ZN2AK4SIMD6DetailL14expand_to_implITkNS0_10SIMDVectorEDv4_iiJLm0ELm1ELm2ELm3EEQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES7_S4_RKNS5_15IntegerSequenceImJXspT1_EEEE |
25 | | |
26 | | } |
27 | | |
28 | | template<SIMDVector V, typename T> |
29 | | requires(SameAs<T, ElementOf<V>>) |
30 | | ALWAYS_INLINE static constexpr V expand_to(T t) |
31 | 0 | { |
32 | 0 | return Detail::expand_to_impl<V>(t, MakeIndexSequence<vector_length<V>>()); |
33 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: ImageDecoder.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: ImageDecoder.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: ICOLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: ICOLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: ICOLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: PNGLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: PNGLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: PNGLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: Font.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: Font.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: Font.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: SHA2.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: SHA2.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: SHA2.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: FuzzPNGLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: FuzzPNGLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: FuzzPNGLoader.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: Filter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: Filter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: Filter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: SHA1.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: SHA1.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: SHA1.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: HTMLCanvasElement.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: HTMLCanvasElement.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: HTMLCanvasElement.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: PNGWriter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: PNGWriter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: PNGWriter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: BytecodeInterpreter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_jjQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: BytecodeInterpreter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_ffQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ Unexecuted instantiation: BytecodeInterpreter.cpp:_ZN2AK4SIMDL9expand_toITkNS0_10SIMDVectorEDv4_iiQ6SameAsIT0_NS_6Detail17__RemoveReferenceIDTixcl7declvalIT_EELi0EEE4TypeEEEES6_S3_ |
34 | | |
35 | | ALWAYS_INLINE static constexpr f32x4 expand4(float f) |
36 | 0 | { |
37 | 0 | return expand_to<f32x4>(f); |
38 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: Font.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: SHA2.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: Filter.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: AES.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: SHA1.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::expand4(float) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::expand4(float) |
39 | | |
40 | | ALWAYS_INLINE static constexpr i32x4 expand4(i32 i) |
41 | 0 | { |
42 | 0 | return expand_to<i32x4>(i); |
43 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: Font.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: SHA2.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: Filter.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: AES.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: SHA1.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::expand4(int) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::expand4(int) |
44 | | |
45 | | ALWAYS_INLINE static constexpr u32x4 expand4(u32 u) |
46 | 0 | { |
47 | 0 | return expand_to<u32x4>(u); |
48 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: Font.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: SHA2.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: Filter.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: AES.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: SHA1.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::expand4(unsigned int) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::expand4(unsigned int) |
49 | | |
50 | | // Masking |
51 | | |
52 | | ALWAYS_INLINE static i32 maskbits(i32x4 mask) |
53 | 0 | { |
54 | 0 | #if defined(__SSE__) |
55 | 0 | return __builtin_ia32_movmskps((f32x4)mask); |
56 | 0 | #else |
57 | 0 | return ((mask[0] & 0x80000000) >> 31) | ((mask[1] & 0x80000000) >> 30) | ((mask[2] & 0x80000000) >> 29) | ((mask[3] & 0x80000000) >> 28); |
58 | 0 | #endif |
59 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::maskbits(int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::maskbits(int __vector(4)) |
60 | | |
61 | | ALWAYS_INLINE static bool all(i32x4 mask) |
62 | 0 | { |
63 | 0 | return maskbits(mask) == 15; |
64 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::all(int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::all(int __vector(4)) |
65 | | |
66 | | ALWAYS_INLINE static bool any(i32x4 mask) |
67 | 0 | { |
68 | 0 | return maskbits(mask) != 0; |
69 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::any(int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::any(int __vector(4)) |
70 | | |
71 | | ALWAYS_INLINE static bool none(i32x4 mask) |
72 | 0 | { |
73 | 0 | return maskbits(mask) == 0; |
74 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::none(int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::none(int __vector(4)) |
75 | | |
76 | | ALWAYS_INLINE static int maskcount(i32x4 mask) |
77 | 0 | { |
78 | 0 | constexpr static int count_lut[16] { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; |
79 | 0 | return count_lut[maskbits(mask)]; |
80 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::maskcount(int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::maskcount(int __vector(4)) |
81 | | |
82 | | // Load / Store |
83 | | |
84 | | template<SIMDVector VectorType> |
85 | | ALWAYS_INLINE static VectorType load_unaligned(void const* a) |
86 | 2.69M | { |
87 | 2.69M | VectorType v; |
88 | 2.69M | __builtin_memcpy(&v, a, sizeof(VectorType)); |
89 | 2.69M | return v; |
90 | 2.69M | } SHA2.cpp:_ZN2AK4SIMDL14load_unalignedITkNS0_10SIMDVectorEDv4_jEET_PKv Line | Count | Source | 86 | 2.19M | { | 87 | 2.19M | VectorType v; | 88 | 2.19M | __builtin_memcpy(&v, a, sizeof(VectorType)); | 89 | 2.19M | return v; | 90 | 2.19M | } |
Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL14load_unalignedITkNS0_10SIMDVectorEDv4_iEET_PKv Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL14load_unalignedITkNS0_10SIMDVectorEDv2_xEET_PKv Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL14load_unalignedITkNS0_10SIMDVectorEDv2_lEET_PKv SHA1.cpp:_ZN2AK4SIMDL14load_unalignedITkNS0_10SIMDVectorEDv4_jEET_PKv Line | Count | Source | 86 | 496k | { | 87 | 496k | VectorType v; | 88 | 496k | __builtin_memcpy(&v, a, sizeof(VectorType)); | 89 | 496k | return v; | 90 | 496k | } |
|
91 | | |
92 | | template<SIMDVector VectorType> |
93 | | ALWAYS_INLINE static void store_unaligned(void* a, VectorType const& v) |
94 | 298k | { |
95 | | // FIXME: Does this generate the right instructions? |
96 | 298k | __builtin_memcpy(a, &v, sizeof(VectorType)); |
97 | 298k | } SHA2.cpp:_ZN2AK4SIMDL15store_unalignedITkNS0_10SIMDVectorEDv4_jEEvPvRKT_ Line | Count | Source | 94 | 199k | { | 95 | | // FIXME: Does this generate the right instructions? | 96 | 199k | __builtin_memcpy(a, &v, sizeof(VectorType)); | 97 | 199k | } |
Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL15store_unalignedITkNS0_10SIMDVectorEDv4_iEEvPvRKT_ Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL15store_unalignedITkNS0_10SIMDVectorEDv2_xEEvPvRKT_ Unexecuted instantiation: AES.cpp:_ZN2AK4SIMDL15store_unalignedITkNS0_10SIMDVectorEDv2_lEEvPvRKT_ SHA1.cpp:_ZN2AK4SIMDL15store_unalignedITkNS0_10SIMDVectorEDv4_jEEvPvRKT_ Line | Count | Source | 94 | 99.3k | { | 95 | | // FIXME: Does this generate the right instructions? | 96 | 99.3k | __builtin_memcpy(a, &v, sizeof(VectorType)); | 97 | 99.3k | } |
|
98 | | |
99 | | ALWAYS_INLINE static f32x4 load4(float const* a, float const* b, float const* c, float const* d) |
100 | 0 | { |
101 | 0 | return f32x4 { *a, *b, *c, *d }; |
102 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: Font.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: SHA2.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: Filter.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: AES.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: SHA1.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::load4(float const*, float const*, float const*, float const*) |
103 | | |
104 | | ALWAYS_INLINE static u32x4 load4(u32 const* a, u32 const* b, u32 const* c, u32 const* d) |
105 | 0 | { |
106 | 0 | return u32x4 { *a, *b, *c, *d }; |
107 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: Font.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: SHA2.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: Filter.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: AES.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: SHA1.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::load4(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*) |
108 | | |
109 | | ALWAYS_INLINE static f32x4 load4_masked(float const* a, float const* b, float const* c, float const* d, i32x4 mask) |
110 | 0 | { |
111 | 0 | int bits = maskbits(mask); |
112 | 0 | return f32x4 { |
113 | 0 | bits & 1 ? *a : 0.f, |
114 | 0 | bits & 2 ? *b : 0.f, |
115 | 0 | bits & 4 ? *c : 0.f, |
116 | 0 | bits & 8 ? *d : 0.f, |
117 | 0 | }; |
118 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::load4_masked(float const*, float const*, float const*, float const*, int __vector(4)) |
119 | | |
120 | | ALWAYS_INLINE static i32x4 load4_masked(u8 const* a, u8 const* b, u8 const* c, u8 const* d, i32x4 mask) |
121 | 0 | { |
122 | 0 | int bits = maskbits(mask); |
123 | 0 | return i32x4 { |
124 | 0 | bits & 1 ? *a : 0, |
125 | 0 | bits & 2 ? *b : 0, |
126 | 0 | bits & 4 ? *c : 0, |
127 | 0 | bits & 8 ? *d : 0, |
128 | 0 | }; |
129 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::load4_masked(unsigned char const*, unsigned char const*, unsigned char const*, unsigned char const*, int __vector(4)) |
130 | | |
131 | | ALWAYS_INLINE static u32x4 load4_masked(u32 const* a, u32 const* b, u32 const* c, u32 const* d, i32x4 mask) |
132 | 0 | { |
133 | 0 | int bits = maskbits(mask); |
134 | 0 | return u32x4 { |
135 | 0 | bits & 1 ? *a : 0u, |
136 | 0 | bits & 2 ? *b : 0u, |
137 | 0 | bits & 4 ? *c : 0u, |
138 | 0 | bits & 8 ? *d : 0u, |
139 | 0 | }; |
140 | 0 | } Unexecuted instantiation: ImageDecoder.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: ICOLoader.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: PNGLoader.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: Font.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: SHA2.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: FuzzPNGLoader.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: Filter.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: AES.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: SHA1.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: HTMLCanvasElement.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: PNGWriter.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) Unexecuted instantiation: BytecodeInterpreter.cpp:AK::SIMD::load4_masked(unsigned int const*, unsigned int const*, unsigned int const*, unsigned int const*, int __vector(4)) |
141 | | |
142 | | template<typename VectorType, typename UnderlyingType = decltype(declval<VectorType>()[0])> |
143 | | ALWAYS_INLINE static void store4(VectorType v, UnderlyingType* a, UnderlyingType* b, UnderlyingType* c, UnderlyingType* d) |
144 | | { |
145 | | *a = v[0]; |
146 | | *b = v[1]; |
147 | | *c = v[2]; |
148 | | *d = v[3]; |
149 | | } |
150 | | |
151 | | template<typename VectorType, typename UnderlyingType = decltype(declval<VectorType>()[0])> |
152 | | ALWAYS_INLINE static void store4_masked(VectorType v, UnderlyingType* a, UnderlyingType* b, UnderlyingType* c, UnderlyingType* d, i32x4 mask) |
153 | | { |
154 | | int bits = maskbits(mask); |
155 | | if (bits & 1) |
156 | | *a = v[0]; |
157 | | if (bits & 2) |
158 | | *b = v[1]; |
159 | | if (bits & 4) |
160 | | *c = v[2]; |
161 | | if (bits & 8) |
162 | | *d = v[3]; |
163 | | } |
164 | | |
165 | | // Shuffle |
166 | | namespace Detail { |
167 | | template<SIMDVector T, SIMDVector Control, size_t... Idx> |
168 | | ALWAYS_INLINE static T shuffle_impl(T a, Control control, IndexSequence<Idx...>) |
169 | | { |
170 | | // FIXME: Maybe make the VERIFYs optional, eg on SIMD-DEBUG, to avoid the overhead in performance oriented users, like LibWasm::SIMD |
171 | | // Note: - instead of _ to make the linter happy, as SIMD-DEBUG does not (yet) exist |
172 | | constexpr Conditional<IsSigned<ElementOf<Control>>, ssize_t, size_t> N = vector_length<T>; |
173 | | // If you hit this verify and want a 0 in these cases instead, use shuffle_or_0 |
174 | | (([control] { VERIFY(control[Idx] < N); })(), ...); |
175 | | |
176 | | // __builtin_shuffle is only available with GCC, and has quite good codegen |
177 | | if constexpr (__has_builtin(__builtin_shuffle)) |
178 | | return __builtin_shuffle(a, control); |
179 | | |
180 | | return T { |
181 | | a[control[Idx]]... |
182 | | }; |
183 | | } |
184 | | |
185 | | // FIXME: AppleClang somehow unconditionally executes the `a[control[Idx]]` path, |
186 | | // even if its in the false branch of the ternary |
187 | | // This leads to a presumably out of bounds access, which is UB |
188 | | // Reenable the sanitizer once this is fixed |
189 | | // As a side note UBsan makes a total mess of the codegen anyway |
190 | | template<SIMDVector T, SIMDVector Control, size_t... Idx> |
191 | | #ifdef AK_COMPILER_CLANG |
192 | | [[clang::no_sanitize("undefined")]] |
193 | | #endif |
194 | | ALWAYS_INLINE static T shuffle_or_0_impl(T a, Control control, IndexSequence<Idx...>) |
195 | 0 | { |
196 | 0 | constexpr Conditional<IsSigned<ElementOf<Control>>, ssize_t, size_t> N = vector_length<T>; |
197 | 0 | using E = ElementOf<T>; |
198 | |
|
199 | | if constexpr (__has_builtin(__builtin_shuffle)) { |
200 | | auto vector = __builtin_shuffle(a, control); |
201 | | for (size_t i = 0; i < N; ++i) |
202 | | vector[i] = control[i] < 0 || control[i] >= N ? 0 : vector[i]; |
203 | | return vector; |
204 | | } |
205 | | // 1. Set all out of bounds values to ~0 |
206 | | // Note: This is done so that the optimization mentioned down below works |
207 | | // Note: Vector compares result in bitmasks, aka all 1s or all 0s per element |
208 | 0 | control |= ~((control >= 0) & (control < N)); |
209 | | // 2. Selectively set out of bounds values to 0 |
210 | | // Note: Clang successfully optimizes this to a few instructions on x86-ssse3, GCC does not |
211 | | // Vector Optimizations/Instruction-Selection on ArmV8 seem to not be as powerful as of Clang18 |
212 | | // FIXME: We could recreate the bit mask Clang uses for the select for u32 and u16 |
213 | | // control = control * explode_byte(sizeof(E)) + 0x03020100; |
214 | | // return (T)shuffle_unchecked(Bytes(a), Bytes(control)); |
215 | | // Note: On x86-ssse3, `pshufb` inserts a zero if the control byte has the highest bit set |
216 | | // On ArmV8, `tbl` inserts a zero if the control byte is out of bounds in general |
217 | | // On RiscV `vrgather.vv` inserts a 0 if the control index is out of bounds |
218 | | // and is more powerful than the other two as it is able to use bigger item widths than a byte |
219 | | // Note: For u64x2 Clang seems to always unroll the compare instead of doing the fancy `phufb` |
220 | |
|
221 | 0 | return T { |
222 | 0 | ((E)(control[Idx] != ~0 ? a[control[Idx]] : 0))... |
223 | 0 | }; |
224 | 0 | } |
225 | | |
226 | | template<SIMDVector T, size_t... Idx> |
227 | | ALWAYS_INLINE static T item_reverse_impl(T a, IndexSequence<Idx...>) |
228 | 198k | { |
229 | 198k | constexpr size_t N = vector_length<T>; |
230 | 198k | return __builtin_shufflevector(a, a, N - 1 - Idx...); |
231 | 198k | } |
232 | | |
233 | | template<SIMDVector T, size_t... Idx> |
234 | | ALWAYS_INLINE static T byte_reverse_impl(T a, IndexSequence<Idx...>) |
235 | 397k | { |
236 | 397k | static_assert(sizeof...(Idx) == sizeof(T)); |
237 | 397k | constexpr size_t N = sizeof(T); |
238 | | // FIXME: GCC silently ignores the dependent vector_size attribute, this seems to be a bug |
239 | | // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68703 |
240 | | // Hence this giant conditional |
241 | 397k | using BytesVector = Conditional<sizeof(T) == 2, u8x2, Conditional<sizeof(T) == 4, u8x4, Conditional<sizeof(T) == 8, u8x8, Conditional<sizeof(T) == 16, u8x16, Conditional<sizeof(T) == 32, u8x32, void>>>>>; |
242 | 397k | static_assert(sizeof(BytesVector) == sizeof(T)); |
243 | 397k | return bit_cast<T>( |
244 | 397k | __builtin_shufflevector( |
245 | 397k | bit_cast<BytesVector>(a), |
246 | 397k | bit_cast<BytesVector>(a), |
247 | 397k | N - 1 - Idx...)); |
248 | 397k | } |
249 | | |
250 | | template<SIMDVector T, size_t... Idx> |
251 | | ALWAYS_INLINE static T elementwise_byte_reverse_impl(T a, IndexSequence<Idx...>) |
252 | 399k | { |
253 | 399k | static_assert(sizeof...(Idx) == vector_length<T>); |
254 | 399k | using Element = ElementOf<T>; |
255 | | if constexpr (sizeof(Element) == 1) { |
256 | | return a; |
257 | | } else if constexpr (sizeof(Element) == 2) { |
258 | | return T { |
259 | | static_cast<Element>(__builtin_bswap16(static_cast<u16>(a[Idx])))... |
260 | | }; |
261 | 399k | } else if constexpr (sizeof(Element) == 4) { |
262 | 399k | return T { |
263 | 399k | static_cast<Element>(__builtin_bswap32(static_cast<u32>(a[Idx])))... |
264 | 399k | }; |
265 | | } else if constexpr (sizeof(Element) == 8) { |
266 | | return T { |
267 | | static_cast<Element>(__builtin_bswap64(static_cast<u64>(a[Idx])))... |
268 | | }; |
269 | | } else { |
270 | | static_assert(DependentFalse<T>); |
271 | | } |
272 | 399k | } |
273 | | |
274 | | } |
275 | | |
276 | | // FIXME: Shuffles only work with integral types for now |
277 | | template<SIMDVector T> |
278 | | ALWAYS_INLINE static T shuffle(T a, IndexVectorFor<T> control) |
279 | | { |
280 | | return Detail::shuffle_impl(a, control, MakeIndexSequence<vector_length<T>>()); |
281 | | } |
282 | | |
283 | | template<SIMDVector T> |
284 | | ALWAYS_INLINE static T shuffle_or_0(T a, IndexVectorFor<T> control) |
285 | 0 | { |
286 | 0 | return Detail::shuffle_or_0_impl(a, control, MakeIndexSequence<vector_length<T>>()); |
287 | 0 | } |
288 | | |
289 | | template<SIMDVector T> |
290 | | ALWAYS_INLINE static T item_reverse(T a) |
291 | 198k | { |
292 | 198k | return Detail::item_reverse_impl(a, MakeIndexSequence<vector_length<T>>()); |
293 | 198k | } |
294 | | |
295 | | template<SIMDVector T> |
296 | | ALWAYS_INLINE static T byte_reverse(T a) |
297 | 397k | { |
298 | 397k | return Detail::byte_reverse_impl(a, MakeIndexSequence<sizeof(T)>()); |
299 | 397k | } |
300 | | |
301 | | template<SIMDVector T> |
302 | | ALWAYS_INLINE static T elementwise_byte_reverse(T a) |
303 | 399k | { |
304 | 399k | return Detail::elementwise_byte_reverse_impl(a, MakeIndexSequence<vector_length<T>>()); |
305 | 399k | } |
306 | | |
307 | | } |