/src/xz/src/liblzma/lzma/fastpos.h
Line | Count | Source (jump to first uncovered line) |
1 | | // SPDX-License-Identifier: 0BSD |
2 | | |
3 | | /////////////////////////////////////////////////////////////////////////////// |
4 | | // |
5 | | /// \file fastpos.h |
6 | | /// \brief Kind of two-bit version of bit scan reverse |
7 | | /// |
8 | | // Authors: Igor Pavlov |
9 | | // Lasse Collin |
10 | | // |
11 | | /////////////////////////////////////////////////////////////////////////////// |
12 | | |
13 | | #ifndef LZMA_FASTPOS_H |
14 | | #define LZMA_FASTPOS_H |
15 | | |
16 | | // LZMA encodes match distances by storing the highest two bits using |
17 | | // a six-bit value [0, 63], and then the missing lower bits. |
18 | | // Dictionary size is also stored using this encoding in the .xz |
19 | | // file format header. |
20 | | // |
21 | | // fastpos.h provides a way to quickly find out the correct six-bit |
22 | | // values. The following table gives some examples of this encoding: |
23 | | // |
24 | | // dist return |
25 | | // 0 0 |
26 | | // 1 1 |
27 | | // 2 2 |
28 | | // 3 3 |
29 | | // 4 4 |
30 | | // 5 4 |
31 | | // 6 5 |
32 | | // 7 5 |
33 | | // 8 6 |
34 | | // 11 6 |
35 | | // 12 7 |
36 | | // ... ... |
37 | | // 15 7 |
38 | | // 16 8 |
39 | | // 17 8 |
40 | | // ... ... |
41 | | // 23 8 |
42 | | // 24 9 |
43 | | // 25 9 |
44 | | // ... ... |
45 | | // |
46 | | // |
47 | | // Provided functions or macros |
48 | | // ---------------------------- |
49 | | // |
50 | | // get_dist_slot(dist) is the basic version. get_dist_slot_2(dist) |
51 | | // assumes that dist >= FULL_DISTANCES, thus the result is at least |
52 | | // FULL_DISTANCES_BITS * 2. Using get_dist_slot(dist) instead of |
53 | | // get_dist_slot_2(dist) would give the same result, but get_dist_slot_2(dist) |
54 | | // should be tiny bit faster due to the assumption being made. |
55 | | // |
56 | | // |
57 | | // Size vs. speed |
58 | | // -------------- |
59 | | // |
60 | | // With some CPUs that have fast BSR (bit scan reverse) instruction, the |
61 | | // size optimized version is slightly faster than the bigger table based |
62 | | // approach. Such CPUs include Intel Pentium Pro, Pentium II, Pentium III |
63 | | // and Core 2 (possibly others). AMD K7 seems to have slower BSR, but that |
64 | | // would still have speed roughly comparable to the table version. Older |
65 | | // x86 CPUs like the original Pentium have very slow BSR; on those systems |
66 | | // the table version is a lot faster. |
67 | | // |
68 | | // On some CPUs, the table version is a lot faster when using position |
69 | | // dependent code, but with position independent code the size optimized |
70 | | // version is slightly faster. This occurs at least on 32-bit SPARC (no |
71 | | // ASM optimizations). |
72 | | // |
73 | | // I'm making the table version the default, because that has good speed |
74 | | // on all systems I have tried. The size optimized version is sometimes |
75 | | // slightly faster, but sometimes it is a lot slower. |
76 | | |
77 | | #ifdef HAVE_SMALL |
78 | | # define get_dist_slot(dist) \ |
79 | | ((dist) <= 4 ? (dist) : get_dist_slot_2(dist)) |
80 | | |
81 | | static inline uint32_t |
82 | | get_dist_slot_2(uint32_t dist) |
83 | | { |
84 | | const uint32_t i = bsr32(dist); |
85 | | return (i + i) + ((dist >> (i - 1)) & 1); |
86 | | } |
87 | | |
88 | | |
89 | | #else |
90 | | |
91 | 6.23M | #define FASTPOS_BITS 13 |
92 | | |
93 | | lzma_attr_visibility_hidden |
94 | | extern const uint8_t lzma_fastpos[1 << FASTPOS_BITS]; |
95 | | |
96 | | |
97 | | #define fastpos_shift(extra, n) \ |
98 | 4.49M | ((extra) + (n) * (FASTPOS_BITS - 1)) |
99 | | |
100 | | #define fastpos_limit(extra, n) \ |
101 | 1.73M | (UINT32_C(1) << (FASTPOS_BITS + fastpos_shift(extra, n))) |
102 | | |
103 | | #define fastpos_result(dist, extra, n) \ |
104 | 1.38M | (uint32_t)(lzma_fastpos[(dist) >> fastpos_shift(extra, n)]) \ |
105 | 1.38M | + 2 * fastpos_shift(extra, n) |
106 | | |
107 | | |
108 | | static inline uint32_t |
109 | | get_dist_slot(uint32_t dist) |
110 | 354k | { |
111 | | // If it is small enough, we can pick the result directly from |
112 | | // the precalculated table. |
113 | 354k | if (dist < fastpos_limit(0, 0)) |
114 | 350k | return lzma_fastpos[dist]; |
115 | | |
116 | 3.67k | if (dist < fastpos_limit(0, 1)) |
117 | 3.67k | return fastpos_result(dist, 0, 1); |
118 | | |
119 | 0 | return fastpos_result(dist, 0, 2); |
120 | 3.67k | } lzma_encoder.c:get_dist_slot Line | Count | Source | 110 | 54.7k | { | 111 | | // If it is small enough, we can pick the result directly from | 112 | | // the precalculated table. | 113 | 54.7k | if (dist < fastpos_limit(0, 0)) | 114 | 54.7k | return lzma_fastpos[dist]; | 115 | | | 116 | 0 | if (dist < fastpos_limit(0, 1)) | 117 | 0 | return fastpos_result(dist, 0, 1); | 118 | | | 119 | 0 | return fastpos_result(dist, 0, 2); | 120 | 0 | } |
lzma_encoder_optimum_normal.c:get_dist_slot Line | Count | Source | 110 | 295k | { | 111 | | // If it is small enough, we can pick the result directly from | 112 | | // the precalculated table. | 113 | 295k | if (dist < fastpos_limit(0, 0)) | 114 | 295k | return lzma_fastpos[dist]; | 115 | | | 116 | 0 | if (dist < fastpos_limit(0, 1)) | 117 | 0 | return fastpos_result(dist, 0, 1); | 118 | | | 119 | 0 | return fastpos_result(dist, 0, 2); | 120 | 0 | } |
Unexecuted instantiation: fastpos_table.c:get_dist_slot lzma2_encoder.c:get_dist_slot Line | Count | Source | 110 | 3.67k | { | 111 | | // If it is small enough, we can pick the result directly from | 112 | | // the precalculated table. | 113 | 3.67k | if (dist < fastpos_limit(0, 0)) | 114 | 0 | return lzma_fastpos[dist]; | 115 | | | 116 | 3.67k | if (dist < fastpos_limit(0, 1)) | 117 | 3.67k | return fastpos_result(dist, 0, 1); | 118 | | | 119 | 0 | return fastpos_result(dist, 0, 2); | 120 | 3.67k | } |
|
121 | | |
122 | | |
123 | | #ifdef FULL_DISTANCES_BITS |
124 | | static inline uint32_t |
125 | | get_dist_slot_2(uint32_t dist) |
126 | 1.37M | { |
127 | 1.37M | assert(dist >= FULL_DISTANCES); |
128 | | |
129 | 1.37M | if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) |
130 | 1.37M | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0); |
131 | | |
132 | 0 | if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) |
133 | 0 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1); |
134 | | |
135 | 0 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2); |
136 | 0 | } Unexecuted instantiation: lzma_encoder.c:get_dist_slot_2 lzma_encoder_optimum_normal.c:get_dist_slot_2 Line | Count | Source | 126 | 1.37M | { | 127 | 1.37M | assert(dist >= FULL_DISTANCES); | 128 | | | 129 | 1.37M | if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 0)) | 130 | 1.37M | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 0); | 131 | | | 132 | 0 | if (dist < fastpos_limit(FULL_DISTANCES_BITS - 1, 1)) | 133 | 0 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 1); | 134 | | | 135 | 0 | return fastpos_result(dist, FULL_DISTANCES_BITS - 1, 2); | 136 | 0 | } |
|
137 | | #endif |
138 | | |
139 | | #endif |
140 | | |
141 | | #endif |