Coverage Report

Created: 2024-05-04 12:45

/proc/self/cwd/external/gemmlowp/internal/compute.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2
//
3
// Licensed under the Apache License, Version 2.0 (the "License");
4
// you may not use this file except in compliance with the License.
5
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
// Unless required by applicable law or agreed to in writing, software
10
// distributed under the License is distributed on an "AS IS" BASIS,
11
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
// See the License for the specific language governing permissions and
13
// limitations under the License.
14
15
// compute.h: the central stage of the Gemm computation, operates
16
// on already-packed LHS and RHS blocks and calls the Gemm kernel
17
// to compute a block of the product.
18
19
#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
20
#define GEMMLOWP_INTERNAL_COMPUTE_H_
21
22
#include "block_params.h"
23
#include "kernel.h"
24
#include "pack.h"
25
26
namespace gemmlowp {
27
28
template <typename PackedLhs, typename PackedRhs, typename PackedResult>
29
class ComputeImpl {
30
  typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
31
  typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
32
  typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;
33
34
  const KernelBase& kernel_;
35
  const BlockParams& block_params_;
36
37
  PackedResult* const packed_result_;
38
  const PackedLhs& packed_lhs_;
39
  const PackedRhs& packed_rhs_;
40
41
 public:
42
  ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
43
              PackedResult* _packed_result, const PackedLhs& _packed_lhs,
44
              const PackedRhs& _packed_rhs)
45
      : kernel_(_kernel),
46
        block_params_(_block_params),
47
        packed_result_(_packed_result),
48
        packed_lhs_(_packed_lhs),
49
0
        packed_rhs_(_packed_rhs) {}
50
51
0
  void Compute(int depth) {
52
0
    depth = RoundUp<Format::kDepth>(depth);
53
0
    assert(depth <= block_params_.l2_depth);
54
0
    for (int d = 0; d < depth; d += block_params_.l1_depth) {
55
0
      int ds = std::min(block_params_.l1_depth, depth - d);
56
57
0
      for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
58
0
        int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);
59
60
0
        ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
61
0
      }
62
0
    }
63
0
  }
64
65
 private:
66
  static void MarkPackedResultBlockAsInitialized(
67
0
      const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) {
68
#ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED
69
    for (int col = 0; col < packed_result_block.cols(); col++) {
70
      MarkMemoryAsInitialized(
71
          packed_result_block.data() + col * packed_result_block.cols_stride(),
72
          packed_result_block.rows());
73
    }
74
#else
75
0
    (void)packed_result_block;
76
0
#endif
77
0
  }
78
79
  void ComputeRun(int start_row, int start_col, int start_depth,
80
0
                  int depth) GEMMLOWP_NOINLINE {
81
0
    packed_lhs_.seek_run(start_row, start_depth);
82
0
    packed_rhs_.seek_run(start_col, start_depth);
83
0
    auto packed_result_block = packed_result_->Map().block(
84
0
        start_row, start_col, Format::kRows, Format::kCols);
85
0
    kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
86
0
                packed_result_block.cols_stride(), packed_lhs_.current_data(),
87
0
                packed_rhs_.current_data(), start_depth, depth);
88
0
    MarkPackedResultBlockAsInitialized(packed_result_block);
89
0
  }
90
91
  void ComputeL1(int start_row, int rows, int start_col, int cols,
92
0
                 int start_depth, int depth) {
93
0
    assert(rows % Format::kRows == 0);
94
0
    assert(cols % Format::kCols == 0);
95
0
    assert(depth % Format::kDepth == 0);
96
97
0
    for (int c = 0; c < cols; c += Format::kCols) {
98
0
      for (int r = 0; r < rows; r += Format::kRows) {
99
0
        ComputeRun(start_row + r, start_col + c, start_depth, depth);
100
0
      }
101
0
    }
102
0
  }
103
};
104
105
template <typename PackedLhs, typename PackedRhs, typename PackedResult>
106
void Compute(const KernelBase& kernel, const BlockParams& block_params,
107
             PackedResult* packed_result, const PackedLhs& packed_lhs,
108
0
             const PackedRhs& packed_rhs, int depth) {
109
0
  ScopedProfilingLabel label("compute");
110
0
  ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
111
0
      kernel, block_params, packed_result, packed_lhs, packed_rhs);
112
113
0
  impl.Compute(depth);
114
0
}
115
116
}  // namespace gemmlowp
117
118
#endif  // GEMMLOWP_INTERNAL_COMPUTE_H_