/proc/self/cwd/external/gemmlowp/internal/compute.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // compute.h: the central stage of the Gemm computation, operates |
16 | | // on already-packed LHS and RHS blocks and calls the Gemm kernel |
17 | | // to compute a block of the product. |
18 | | |
19 | | #ifndef GEMMLOWP_INTERNAL_COMPUTE_H_ |
20 | | #define GEMMLOWP_INTERNAL_COMPUTE_H_ |
21 | | |
22 | | #include "block_params.h" |
23 | | #include "kernel.h" |
24 | | #include "pack.h" |
25 | | |
26 | | namespace gemmlowp { |
27 | | |
28 | | template <typename PackedLhs, typename PackedRhs, typename PackedResult> |
29 | | class ComputeImpl { |
30 | | typedef typename PackedLhs::KernelSideFormat KernelLhsFormat; |
31 | | typedef typename PackedRhs::KernelSideFormat KernelRhsFormat; |
32 | | typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format; |
33 | | |
34 | | const KernelBase& kernel_; |
35 | | const BlockParams& block_params_; |
36 | | |
37 | | PackedResult* const packed_result_; |
38 | | const PackedLhs& packed_lhs_; |
39 | | const PackedRhs& packed_rhs_; |
40 | | |
41 | | public: |
42 | | ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params, |
43 | | PackedResult* _packed_result, const PackedLhs& _packed_lhs, |
44 | | const PackedRhs& _packed_rhs) |
45 | | : kernel_(_kernel), |
46 | | block_params_(_block_params), |
47 | | packed_result_(_packed_result), |
48 | | packed_lhs_(_packed_lhs), |
49 | 0 | packed_rhs_(_packed_rhs) {} |
50 | | |
51 | 0 | void Compute(int depth) { |
52 | 0 | depth = RoundUp<Format::kDepth>(depth); |
53 | 0 | assert(depth <= block_params_.l2_depth); |
54 | 0 | for (int d = 0; d < depth; d += block_params_.l1_depth) { |
55 | 0 | int ds = std::min(block_params_.l1_depth, depth - d); |
56 | |
|
57 | 0 | for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) { |
58 | 0 | int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r); |
59 | |
|
60 | 0 | ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds); |
61 | 0 | } |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | private: |
66 | | static void MarkPackedResultBlockAsInitialized( |
67 | 0 | const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) { |
68 | | #ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED |
69 | | for (int col = 0; col < packed_result_block.cols(); col++) { |
70 | | MarkMemoryAsInitialized( |
71 | | packed_result_block.data() + col * packed_result_block.cols_stride(), |
72 | | packed_result_block.rows()); |
73 | | } |
74 | | #else |
75 | 0 | (void)packed_result_block; |
76 | 0 | #endif |
77 | 0 | } |
78 | | |
79 | | void ComputeRun(int start_row, int start_col, int start_depth, |
80 | 0 | int depth) GEMMLOWP_NOINLINE { |
81 | 0 | packed_lhs_.seek_run(start_row, start_depth); |
82 | 0 | packed_rhs_.seek_run(start_col, start_depth); |
83 | 0 | auto packed_result_block = packed_result_->Map().block( |
84 | 0 | start_row, start_col, Format::kRows, Format::kCols); |
85 | 0 | kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(), |
86 | 0 | packed_result_block.cols_stride(), packed_lhs_.current_data(), |
87 | 0 | packed_rhs_.current_data(), start_depth, depth); |
88 | 0 | MarkPackedResultBlockAsInitialized(packed_result_block); |
89 | 0 | } |
90 | | |
91 | | void ComputeL1(int start_row, int rows, int start_col, int cols, |
92 | 0 | int start_depth, int depth) { |
93 | 0 | assert(rows % Format::kRows == 0); |
94 | 0 | assert(cols % Format::kCols == 0); |
95 | 0 | assert(depth % Format::kDepth == 0); |
96 | | |
97 | 0 | for (int c = 0; c < cols; c += Format::kCols) { |
98 | 0 | for (int r = 0; r < rows; r += Format::kRows) { |
99 | 0 | ComputeRun(start_row + r, start_col + c, start_depth, depth); |
100 | 0 | } |
101 | 0 | } |
102 | 0 | } |
103 | | }; |
104 | | |
105 | | template <typename PackedLhs, typename PackedRhs, typename PackedResult> |
106 | | void Compute(const KernelBase& kernel, const BlockParams& block_params, |
107 | | PackedResult* packed_result, const PackedLhs& packed_lhs, |
108 | 0 | const PackedRhs& packed_rhs, int depth) { |
109 | 0 | ScopedProfilingLabel label("compute"); |
110 | 0 | ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl( |
111 | 0 | kernel, block_params, packed_result, packed_lhs, packed_rhs); |
112 | |
|
113 | 0 | impl.Compute(depth); |
114 | 0 | } |
115 | | |
116 | | } // namespace gemmlowp |
117 | | |
118 | | #endif // GEMMLOWP_INTERNAL_COMPUTE_H_ |