Coverage Report

Created: 2025-07-11 06:21

/src/xz/src/liblzma/lzma/fastpos.h
Line
Count
Source (jump to first uncovered line)
1
// SPDX-License-Identifier: 0BSD
2
3
///////////////////////////////////////////////////////////////////////////////
4
//
5
/// \file       fastpos.h
6
/// \brief      Kind of two-bit version of bit scan reverse
7
///
8
//  Authors:    Igor Pavlov
9
//              Lasse Collin
10
//
11
///////////////////////////////////////////////////////////////////////////////
12
13
#ifndef LZMA_FASTPOS_H
14
#define LZMA_FASTPOS_H
15
16
// LZMA encodes match distances by storing the highest two bits using
17
// a six-bit value [0, 63], and then the missing lower bits.
18
// Dictionary size is also stored using this encoding in the .xz
19
// file format header.
20
//
21
// fastpos.h provides a way to quickly find out the correct six-bit
22
// values. The following table gives some examples of this encoding:
23
//
24
//     dist   return
25
//       0       0
26
//       1       1
27
//       2       2
28
//       3       3
29
//       4       4
30
//       5       4
31
//       6       5
32
//       7       5
33
//       8       6
34
//      11       6
35
//      12       7
36
//     ...      ...
37
//      15       7
38
//      16       8
39
//      17       8
40
//     ...      ...
41
//      23       8
42
//      24       9
43
//      25       9
44
//     ...      ...
45
//
46
//
47
// Provided functions or macros
48
// ----------------------------
49
//
50
// get_dist_slot(dist) is the basic version. get_dist_slot_2(dist)
51
// assumes that dist >= FULL_DISTANCES, thus the result is at least
52
// FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of
53
// get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist)
54
// should be tiny bit faster due to the assumption being made.
55
//
56
//
57
// Size vs. speed
58
// --------------
59
//
60
// With some CPUs that have fast BSR (bit scan reverse) instruction, the
61
// size optimized version is slightly faster than the bigger table based
62
// approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III
63
// and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that
64
// would still have speed roughly comparable to the table version. Older
65
// x86 CPUs like the original Pentium have very slow BSR; on those systems
66
// the table version is a lot faster.
67
//
68
// On some CPUs, the table version is a lot faster when using position
69
// dependent code, but with position independent code the size optimized
70
// version is slightly faster. This occurs at least on 32-bit SPARC (no
71
// ASM optimizations).
72
//
73
// I'm making the table version the default, because that has good speed
74
// on all systems I have tried. The size optimized version is sometimes
75
// slightly faster, but sometimes it is a lot slower.
76
77
#ifdef HAVE_SMALL
78
# define get_dist_slot(dist) \
79
    ((dist) <= 4 ? (dist) : get_dist_slot_2(dist))
80
81
static inline uint32_t
82
get_dist_slot_2(uint32_t dist)
83
{
84
  const uint32_t i = bsr32(dist);
85
  return (i + i) + ((dist >> (i - 1)) & 1);
86
}
87
88
89
#else
90
91
6.23M
#define FASTPOS_BITS 13
92
93
lzma_attr_visibility_hidden
94
extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS];
95
96
97
#define fastpos_shift(extra, n) \
98
4.49M
  ((extra) + (n) * (FASTPOS_BITS - 1))
99
100
#define fastpos_limit(extra, n) \
101
1.73M
  (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n)))
102
103
#define fastpos_result(dist, extra, n) \
104
1.38M
  (uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \
105
1.38M
      + 2 * fastpos_shift(extra, n)
106
107
108
static inline uint32_t
109
get_dist_slot(uint32_t dist)
110
354k
{
111
  // If it is small enough, we can pick the result directly from
112
  // the precalculated table.
113
354k
  if (dist < fastpos_limit(0, 0))
114
350k
    return lzma_fastpos[dist];
115
116
3.67k
  if (dist < fastpos_limit(0, 1))
117
3.67k
    return fastpos_result(dist, 0, 1);
118
119
0
  return fastpos_result(dist, 0, 2);
120
3.67k
}
lzma_encoder.c:get_dist_slot
Line
Count
Source
110
54.7k
{
111
  // If it is small enough, we can pick the result directly from
112
  // the precalculated table.
113
54.7k
  if (dist < fastpos_limit(0, 0))
114
54.7k
    return lzma_fastpos[dist];
115
116
0
  if (dist < fastpos_limit(0, 1))
117
0
    return fastpos_result(dist, 0, 1);
118
119
0
  return fastpos_result(dist, 0, 2);
120
0
}
lzma_encoder_optimum_normal.c:get_dist_slot
Line
Count
Source
110
295k
{
111
  // If it is small enough, we can pick the result directly from
112
  // the precalculated table.
113
295k
  if (dist < fastpos_limit(0, 0))
114
295k
    return lzma_fastpos[dist];
115
116
0
  if (dist < fastpos_limit(0, 1))
117
0
    return fastpos_result(dist, 0, 1);
118
119
0
  return fastpos_result(dist, 0, 2);
120
0
}
Unexecuted instantiation: fastpos_table.c:get_dist_slot
lzma2_encoder.c:get_dist_slot
Line
Count
Source
110
3.67k
{
111
  // If it is small enough, we can pick the result directly from
112
  // the precalculated table.
113
3.67k
  if (dist < fastpos_limit(0, 0))
114
0
    return lzma_fastpos[dist];
115
116
3.67k
  if (dist < fastpos_limit(0, 1))
117
3.67k
    return fastpos_result(dist, 0, 1);
118
119
0
  return fastpos_result(dist, 0, 2);
120
3.67k
}
121
122
123
#ifdef FULL_DISTANCES_BITS
124
static inline uint32_t
125
get_dist_slot_2(uint32_t dist)
126
1.37M
{
127
1.37M
  assert(dist >= FULL_DISTANCES);
128
129
1.37M
  if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
130
1.37M
    return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
131
132
0
  if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
133
0
    return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
134
135
0
  return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
136
0
}
Unexecuted instantiation: lzma_encoder.c:get_dist_slot_2
lzma_encoder_optimum_normal.c:get_dist_slot_2
Line
Count
Source
126
1.37M
{
127
1.37M
  assert(dist >= FULL_DISTANCES);
128
129
1.37M
  if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0))
130
1.37M
    return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0);
131
132
0
  if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1))
133
0
    return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1);
134
135
0
  return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2);
136
0
}
137
#endif
138
139
#endif
140
141
#endif