/proc/self/cwd/external/gemmlowp/internal/compute.h

Source (jump to first uncovered line)
// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// compute.h: the central stage of the Gemm computation, operates
// on already-packed LHS and RHS blocks and calls the Gemm kernel
// to compute a block of the product.

#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
#define GEMMLOWP_INTERNAL_COMPUTE_H_

#include "block_params.h"
#include "kernel.h"
#include "pack.h"

namespace gemmlowp {

template <typename PackedLhs, typename PackedRhs, typename PackedResult>
class ComputeImpl {
  typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
  typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
  typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;

  const KernelBase& kernel_;
  const BlockParams& block_params_;

  PackedResult* const packed_result_;
  const PackedLhs& packed_lhs_;
  const PackedRhs& packed_rhs_;

 public:
  ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
              PackedResult* _packed_result, const PackedLhs& _packed_lhs,
              const PackedRhs& _packed_rhs)
      : kernel_(_kernel),
        block_params_(_block_params),
        packed_result_(_packed_result),
        packed_lhs_(_packed_lhs),
        packed_rhs_(_packed_rhs) {}

  void Compute(int depth) {
    depth = RoundUp<Format::kDepth>(depth);
    assert(depth <= block_params_.l2_depth);
    for (int d = 0; d < depth; d += block_params_.l1_depth) {
      int ds = std::min(block_params_.l1_depth, depth - d);

      for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
        int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);

        ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
      }
    }
  }

 private:
  static void MarkPackedResultBlockAsInitialized(
      const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) {
#ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED
    for (int col = 0; col < packed_result_block.cols(); col++) {
      MarkMemoryAsInitialized(
          packed_result_block.data() + col * packed_result_block.cols_stride(),
          packed_result_block.rows());
    }
#else
    (void)packed_result_block;
#endif
  }

  void ComputeRun(int start_row, int start_col, int start_depth,
                  int depth) GEMMLOWP_NOINLINE {
    packed_lhs_.seek_run(start_row, start_depth);
    packed_rhs_.seek_run(start_col, start_depth);
    auto packed_result_block = packed_result_->Map().block(
        start_row, start_col, Format::kRows, Format::kCols);
    kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
                packed_result_block.cols_stride(), packed_lhs_.current_data(),
                packed_rhs_.current_data(), start_depth, depth);
    MarkPackedResultBlockAsInitialized(packed_result_block);
  }

  void ComputeL1(int start_row, int rows, int start_col, int cols,
                 int start_depth, int depth) {
    assert(rows % Format::kRows == 0);
    assert(cols % Format::kCols == 0);
    assert(depth % Format::kDepth == 0);

    for (int c = 0; c < cols; c += Format::kCols) {
      for (int r = 0; r < rows; r += Format::kRows) {
        ComputeRun(start_row + r, start_col + c, start_depth, depth);
      }
    }
  }
};

template <typename PackedLhs, typename PackedRhs, typename PackedResult>
void Compute(const KernelBase& kernel, const BlockParams& block_params,
             PackedResult* packed_result, const PackedLhs& packed_lhs,
             const PackedRhs& packed_rhs, int depth) {
  ScopedProfilingLabel label("compute");
  ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
      kernel, block_params, packed_result, packed_lhs, packed_rhs);

  impl.Compute(depth);
}

}  // namespace gemmlowp

#endif  // GEMMLOWP_INTERNAL_COMPUTE_H_

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
2		//
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		//
7		// http://www.apache.org/licenses/LICENSE-2.0
8		//
9		// Unless required by applicable law or agreed to in writing, software
10		// distributed under the License is distributed on an "AS IS" BASIS,
11		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12		// See the License for the specific language governing permissions and
13		// limitations under the License.
14
15		// compute.h: the central stage of the Gemm computation, operates
16		// on already-packed LHS and RHS blocks and calls the Gemm kernel
17		// to compute a block of the product.
18
19		#ifndef GEMMLOWP_INTERNAL_COMPUTE_H_
20		#define GEMMLOWP_INTERNAL_COMPUTE_H_
21
22		#include "block_params.h"
23		#include "kernel.h"
24		#include "pack.h"
25
26		namespace gemmlowp {
27
28		template <typename PackedLhs, typename PackedRhs, typename PackedResult>
29		class ComputeImpl {
30		typedef typename PackedLhs::KernelSideFormat KernelLhsFormat;
31		typedef typename PackedRhs::KernelSideFormat KernelRhsFormat;
32		typedef KernelFormat<KernelLhsFormat, KernelRhsFormat> Format;
33
34		const KernelBase& kernel_;
35		const BlockParams& block_params_;
36
37		PackedResult* const packed_result_;
38		const PackedLhs& packed_lhs_;
39		const PackedRhs& packed_rhs_;
40
41		public:
42		ComputeImpl(const KernelBase& _kernel, const BlockParams& _block_params,
43		PackedResult* _packed_result, const PackedLhs& _packed_lhs,
44		const PackedRhs& _packed_rhs)
45		: kernel_(_kernel),
46		block_params_(_block_params),
47		packed_result_(_packed_result),
48		packed_lhs_(_packed_lhs),
49	0	packed_rhs_(_packed_rhs) {}
50
51	0	void Compute(int depth) {
52	0	depth = RoundUp<Format::kDepth>(depth);
53	0	assert(depth <= block_params_.l2_depth);
54	0	for (int d = 0; d < depth; d += block_params_.l1_depth) {
55	0	int ds = std::min(block_params_.l1_depth, depth - d);
56
57	0	for (int r = 0; r < block_params_.l2_rows; r += block_params_.l1_rows) {
58	0	int rs = std::min(block_params_.l1_rows, block_params_.l2_rows - r);
59
60	0	ComputeL1(r, rs, 0, block_params_.l2_cols, d, ds);
61	0	}
62	0	}
63	0	}
64
65		private:
66		static void MarkPackedResultBlockAsInitialized(
67	0	const MatrixMap<std::int32_t, MapOrder::ColMajor>& packed_result_block) {
68		#ifdef GEMMLOWP_MARK_MEMORY_AS_INITIALIZED
69		for (int col = 0; col < packed_result_block.cols(); col++) {
70		MarkMemoryAsInitialized(
71		packed_result_block.data() + col * packed_result_block.cols_stride(),
72		packed_result_block.rows());
73		}
74		#else
75	0	(void)packed_result_block;
76	0	#endif
77	0	}
78
79		void ComputeRun(int start_row, int start_col, int start_depth,
80	0	int depth) GEMMLOWP_NOINLINE {
81	0	packed_lhs_.seek_run(start_row, start_depth);
82	0	packed_rhs_.seek_run(start_col, start_depth);
83	0	auto packed_result_block = packed_result_->Map().block(
84	0	start_row, start_col, Format::kRows, Format::kCols);
85	0	kernel_.Run(packed_result_block.data(), packed_result_block.rows_stride(),
86	0	packed_result_block.cols_stride(), packed_lhs_.current_data(),
87	0	packed_rhs_.current_data(), start_depth, depth);
88	0	MarkPackedResultBlockAsInitialized(packed_result_block);
89	0	}
90
91		void ComputeL1(int start_row, int rows, int start_col, int cols,
92	0	int start_depth, int depth) {
93	0	assert(rows % Format::kRows == 0);
94	0	assert(cols % Format::kCols == 0);
95	0	assert(depth % Format::kDepth == 0);
96
97	0	for (int c = 0; c < cols; c += Format::kCols) {
98	0	for (int r = 0; r < rows; r += Format::kRows) {
99	0	ComputeRun(start_row + r, start_col + c, start_depth, depth);
100	0	}
101	0	}
102	0	}
103		};
104
105		template <typename PackedLhs, typename PackedRhs, typename PackedResult>
106		void Compute(const KernelBase& kernel, const BlockParams& block_params,
107		PackedResult* packed_result, const PackedLhs& packed_lhs,
108	0	const PackedRhs& packed_rhs, int depth) {
109	0	ScopedProfilingLabel label("compute");
110	0	ComputeImpl<PackedLhs, PackedRhs, PackedResult> impl(
111	0	kernel, block_params, packed_result, packed_lhs, packed_rhs);
112
113	0	impl.Compute(depth);
114	0	}
115
116		} // namespace gemmlowp
117
118		#endif // GEMMLOWP_INTERNAL_COMPUTE_H_

Coverage Report

Created: 2024-05-04 12:45