Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/simd_util.cc
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/simd_util.h"
7
8
#include <cstddef>
9
#include <cstdint>
10
11
#include "lib/jxl/base/compiler_specific.h"
12
13
#undef HWY_TARGET_INCLUDE
14
#define HWY_TARGET_INCLUDE "lib/jxl/simd_util.cc"
15
#include <hwy/foreach_target.h>
16
#include <hwy/highway.h>
17
18
HWY_BEFORE_NAMESPACE();
19
namespace jxl {
20
namespace HWY_NAMESPACE {
21
22
using hwy::HWY_NAMESPACE::GetLane;
23
using hwy::HWY_NAMESPACE::IfThenElseZero;
24
using hwy::HWY_NAMESPACE::Iota;
25
using hwy::HWY_NAMESPACE::LoadU;
26
using hwy::HWY_NAMESPACE::Lt;
27
using hwy::HWY_NAMESPACE::Max;
28
using hwy::HWY_NAMESPACE::MaxOfLanes;
29
using hwy::HWY_NAMESPACE::Set;
30
31
40.2M
size_t MaxVectorSize() {
32
40.2M
  HWY_FULL(float) df;
33
40.2M
  return Lanes(df) * sizeof(float);
34
40.2M
}
Unexecuted instantiation: jxl::N_SSE4::MaxVectorSize()
jxl::N_AVX2::MaxVectorSize()
Line
Count
Source
31
40.2M
size_t MaxVectorSize() {
32
40.2M
  HWY_FULL(float) df;
33
40.2M
  return Lanes(df) * sizeof(float);
34
40.2M
}
Unexecuted instantiation: jxl::N_AVX3::MaxVectorSize()
Unexecuted instantiation: jxl::N_AVX3_ZEN4::MaxVectorSize()
Unexecuted instantiation: jxl::N_AVX3_SPR::MaxVectorSize()
Unexecuted instantiation: jxl::N_SSE2::MaxVectorSize()
35
36
290k
uint32_t MaxValue(uint32_t* JXL_RESTRICT data, size_t len) {
37
290k
  HWY_FULL(uint32_t) du;
38
290k
  size_t last_full = Lanes(du) * (len / Lanes(du));
39
290k
  auto max = Set(du, 0);
40
8.49M
  for (size_t i = 0; i < last_full; i += Lanes(du)) {
41
8.20M
    max = Max(max, LoadU(du, data + i));
42
8.20M
  }
43
290k
  if (last_full < len) {
44
88.5k
    const auto stop = Set(du, len);
45
88.5k
    const auto fence = Iota(du, last_full);
46
88.5k
    const auto take = Lt(fence, stop);
47
88.5k
    max = Max(max, IfThenElseZero(take, LoadU(du, data + last_full)));
48
88.5k
  }
49
290k
  return GetLane(MaxOfLanes(du, max));
50
290k
}
Unexecuted instantiation: jxl::N_SSE4::MaxValue(unsigned int*, unsigned long)
jxl::N_AVX2::MaxValue(unsigned int*, unsigned long)
Line
Count
Source
36
290k
uint32_t MaxValue(uint32_t* JXL_RESTRICT data, size_t len) {
37
290k
  HWY_FULL(uint32_t) du;
38
290k
  size_t last_full = Lanes(du) * (len / Lanes(du));
39
290k
  auto max = Set(du, 0);
40
8.49M
  for (size_t i = 0; i < last_full; i += Lanes(du)) {
41
8.20M
    max = Max(max, LoadU(du, data + i));
42
8.20M
  }
43
290k
  if (last_full < len) {
44
88.5k
    const auto stop = Set(du, len);
45
88.5k
    const auto fence = Iota(du, last_full);
46
88.5k
    const auto take = Lt(fence, stop);
47
88.5k
    max = Max(max, IfThenElseZero(take, LoadU(du, data + last_full)));
48
88.5k
  }
49
290k
  return GetLane(MaxOfLanes(du, max));
50
290k
}
Unexecuted instantiation: jxl::N_AVX3::MaxValue(unsigned int*, unsigned long)
Unexecuted instantiation: jxl::N_AVX3_ZEN4::MaxValue(unsigned int*, unsigned long)
Unexecuted instantiation: jxl::N_AVX3_SPR::MaxValue(unsigned int*, unsigned long)
Unexecuted instantiation: jxl::N_SSE2::MaxValue(unsigned int*, unsigned long)
51
52
// NOLINTNEXTLINE(google-readability-namespace-comments)
53
}  // namespace HWY_NAMESPACE
54
}  // namespace jxl
55
HWY_AFTER_NAMESPACE();
56
57
#if HWY_ONCE
58
namespace jxl {
59
60
HWY_EXPORT(MaxVectorSize);
61
HWY_EXPORT(MaxValue);
62
63
40.2M
size_t MaxVectorSize() {
64
  // Ideally HWY framework should provide us this value.
65
  // Less than ideal is to check all available targets and choose maximal.
66
  // As for now, we just ask current active target, assuming it won't change.
67
40.2M
  return HWY_DYNAMIC_DISPATCH(MaxVectorSize)();
68
40.2M
}
69
70
290k
uint32_t MaxValue(uint32_t* JXL_RESTRICT data, size_t len) {
71
290k
  return HWY_DYNAMIC_DISPATCH(MaxValue)(data, len);
72
290k
}
73
74
}  // namespace jxl
75
#endif