/src/harfbuzz/src/hb-set-digest.hh
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright © 2012 Google, Inc. |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #ifndef HB_SET_DIGEST_HH |
28 | | #define HB_SET_DIGEST_HH |
29 | | |
30 | | #include "hb.hh" |
31 | | #include "hb-machinery.hh" |
32 | | |
33 | | /* |
34 | | * The set-digests implement "filters" that support "approximate |
35 | | * member query". Conceptually these are like Bloom Filter and |
36 | | * Quotient Filter, however, much smaller, faster, and designed |
37 | | * to fit the requirements of our uses for glyph coverage queries. |
38 | | * |
39 | | * Our filters are highly accurate if the lookup covers fairly local |
40 | | * set of glyphs, but fully flooded and ineffective if coverage is |
41 | | * all over the place. |
42 | | * |
43 | | * The way these are used is that the filter is first populated by |
44 | | * a lookup's or subtable's Coverage table(s), and then when we |
45 | | * want to apply the lookup or subtable to a glyph, before trying |
46 | | * to apply, we ask the filter if the glyph may be covered. If it's |
47 | | * not, we return early. We can also match a digest against another |
48 | | * digest. |
49 | | * |
50 | | * We use these filters at three levels: |
51 | | * - If the digest for all the glyphs in the buffer as a whole |
52 | | * does not match the digest for the lookup, skip the lookup. |
53 | | * - For each glyph, if it doesn't match the lookup digest, |
54 | | * skip it. |
55 | | * - For each glyph, if it doesn't match the subtable digest, |
56 | | * skip it. |
57 | | * |
58 | | * The main filter we use is a combination of four bits-pattern |
59 | | * filters. A bits-pattern filter checks a number of bits (5 or 6) |
60 | | * of the input number (glyph-id in this case) and checks whether |
61 | | * its pattern is amongst the patterns of any of the accepted values. |
62 | | * The accepted patterns are represented as a "long" integer. The |
63 | | * check is done using four bitwise operations only. |
64 | | */ |
65 | | |
66 | | static constexpr unsigned hb_set_digest_shifts[] = {4, 0, 6}; |
67 | | |
68 | | struct hb_set_digest_t |
69 | | { |
70 | | // No science in these. Intuition and testing only. |
71 | | using mask_t = uint64_t; |
72 | | |
73 | | static constexpr unsigned n = ARRAY_LENGTH_CONST (hb_set_digest_shifts); |
74 | | static constexpr unsigned mask_bytes = sizeof (mask_t); |
75 | | static constexpr unsigned mask_bits = sizeof (mask_t) * 8; |
76 | | static constexpr hb_codepoint_t mb1 = mask_bits - 1; |
77 | | static constexpr mask_t one = 1; |
78 | | static constexpr mask_t all = (mask_t) -1; |
79 | | |
80 | | void init () |
81 | 1.61M | { for (unsigned i = 0; i < n; i++) masks[i] = 0; } |
82 | | |
83 | 402k | void clear () { init (); } |
84 | | |
85 | | static hb_set_digest_t full () |
86 | 0 | { |
87 | 0 | hb_set_digest_t d; |
88 | 0 | for (unsigned i = 0; i < n; i++) d.masks[i] = all; |
89 | 0 | return d; |
90 | 0 | } |
91 | | |
92 | | void union_ (const hb_set_digest_t &o) |
93 | 1.60k | { for (unsigned i = 0; i < n; i++) masks[i] |= o.masks[i]; } |
94 | | |
95 | | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
96 | 993 | { |
97 | 993 | bool ret; |
98 | | |
99 | 993 | ret = false; |
100 | 3.97k | for (unsigned i = 0; i < n; i++) |
101 | 2.97k | if (masks[i] != all) |
102 | 2.97k | ret = true; |
103 | 993 | if (!ret) return false; |
104 | | |
105 | 993 | ret = false; |
106 | 3.97k | for (unsigned i = 0; i < n; i++) |
107 | 2.97k | { |
108 | 2.97k | mask_t shift = hb_set_digest_shifts[i]; |
109 | 2.97k | if ((b >> shift) - (a >> shift) >= mb1) |
110 | 5 | masks[i] = all; |
111 | 2.97k | else |
112 | 2.97k | { |
113 | 2.97k | mask_t ma = one << ((a >> shift) & mb1); |
114 | 2.97k | mask_t mb = one << ((b >> shift) & mb1); |
115 | 2.97k | masks[i] |= mb + (mb - ma) - (mb < ma); |
116 | 2.97k | ret = true; |
117 | 2.97k | } |
118 | 2.97k | } |
119 | 993 | return ret; |
120 | 993 | } |
121 | | |
122 | | template <typename T> |
123 | | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
124 | 402k | { |
125 | 4.41M | for (unsigned int i = 0; i < count; i++) |
126 | 4.01M | { |
127 | 4.01M | add (*array); |
128 | 4.01M | array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); |
129 | 4.01M | } |
130 | 402k | } void hb_set_digest_t::add_array<OT::HBGlyphID16>(OT::HBGlyphID16 const*, unsigned int, unsigned int) Line | Count | Source | 124 | 184 | { | 125 | 911 | for (unsigned int i = 0; i < count; i++) | 126 | 727 | { | 127 | 727 | add (*array); | 128 | 727 | array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); | 129 | 727 | } | 130 | 184 | } |
void hb_set_digest_t::add_array<unsigned int>(unsigned int const*, unsigned int, unsigned int) Line | Count | Source | 124 | 402k | { | 125 | 4.41M | for (unsigned int i = 0; i < count; i++) | 126 | 4.01M | { | 127 | 4.01M | add (*array); | 128 | 4.01M | array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); | 129 | 4.01M | } | 130 | 402k | } |
|
131 | | template <typename T> |
132 | | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
133 | | template <typename T> |
134 | | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
135 | 184 | { |
136 | 184 | add_array (array, count, stride); |
137 | 184 | return true; |
138 | 184 | } |
139 | | template <typename T> |
140 | 184 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
141 | | |
142 | | bool operator [] (hb_codepoint_t g) const |
143 | 0 | { return may_have (g); } |
144 | | |
145 | | |
146 | | void add (hb_codepoint_t g) |
147 | 4.01M | { |
148 | 16.0M | for (unsigned i = 0; i < n; i++) |
149 | 12.0M | masks[i] |= one << ((g >> hb_set_digest_shifts[i]) & mb1); |
150 | 4.01M | } |
151 | | |
152 | | HB_ALWAYS_INLINE |
153 | | bool may_have (hb_codepoint_t g) const |
154 | 657 | { |
155 | 1.77k | for (unsigned i = 0; i < n; i++) |
156 | 1.40k | if (!(masks[i] & (one << ((g >> hb_set_digest_shifts[i]) & mb1)))) |
157 | 285 | return false; |
158 | 372 | return true; |
159 | 657 | } |
160 | | |
161 | | bool may_intersect (const hb_set_digest_t &o) const |
162 | 11.7k | { |
163 | 12.9k | for (unsigned i = 0; i < n; i++) |
164 | 12.6k | if (!(masks[i] & o.masks[i])) |
165 | 11.4k | return false; |
166 | 315 | return true; |
167 | 11.7k | } |
168 | | |
169 | | private: |
170 | | |
171 | | mask_t masks[n] = {}; |
172 | | }; |
173 | | |
174 | | |
175 | | #endif /* HB_SET_DIGEST_HH */ |