/src/harfbuzz/src/hb-set-digest.hh
Line | Count | Source |
1 | | /* |
2 | | * Copyright © 2012 Google, Inc. |
3 | | * |
4 | | * This is part of HarfBuzz, a text shaping library. |
5 | | * |
6 | | * Permission is hereby granted, without written agreement and without |
7 | | * license or royalty fees, to use, copy, modify, and distribute this |
8 | | * software and its documentation for any purpose, provided that the |
9 | | * above copyright notice and the following two paragraphs appear in |
10 | | * all copies of this software. |
11 | | * |
12 | | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | | * DAMAGE. |
17 | | * |
18 | | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | | * |
24 | | * Google Author(s): Behdad Esfahbod |
25 | | */ |
26 | | |
27 | | #ifndef HB_SET_DIGEST_HH |
28 | | #define HB_SET_DIGEST_HH |
29 | | |
30 | | #include "hb.hh" |
31 | | |
32 | | /* |
33 | | * The set-digests here implement various "filters" that support |
34 | | * "approximate member query". Conceptually these are like Bloom |
35 | | * Filter and Quotient Filter, however, much smaller, faster, and |
36 | | * designed to fit the requirements of our uses for glyph coverage |
37 | | * queries. |
38 | | * |
39 | | * Our filters are highly accurate if the lookup covers fairly local |
40 | | * set of glyphs, but fully flooded and ineffective if coverage is |
41 | | * all over the place. |
42 | | * |
43 | | * The way these are used is that the filter is first populated by |
44 | | * a lookup's or subtable's Coverage table(s), and then when we |
45 | | * want to apply the lookup or subtable to a glyph, before trying |
46 | | * to apply, we ask the filter if the glyph may be covered. If it's |
47 | | * not, we return early. |
48 | | * |
49 | | * We use these filters both at the lookup-level, and then again, |
50 | | * at the subtable-level. Both have performance win. |
51 | | * |
52 | | * The main filter we use is a combination of three bits-pattern |
53 | | * filters. A bits-pattern filter checks a number of bits (5 or 6) |
54 | | * of the input number (glyph-id in this case) and checks whether |
55 | | * its pattern is amongst the patterns of any of the accepted values. |
56 | | * The accepted patterns are represented as a "long" integer. The |
57 | | * check is done using four bitwise operations only. |
58 | | */ |
59 | | |
60 | | template <typename mask_t, unsigned int shift> |
61 | | struct hb_set_digest_bits_pattern_t |
62 | | { |
63 | | static constexpr unsigned mask_bytes = sizeof (mask_t); |
64 | | static constexpr unsigned mask_bits = sizeof (mask_t) * 8; |
65 | | static constexpr unsigned num_bits = 0 |
66 | | + (mask_bytes >= 1 ? 3 : 0) |
67 | | + (mask_bytes >= 2 ? 1 : 0) |
68 | | + (mask_bytes >= 4 ? 1 : 0) |
69 | | + (mask_bytes >= 8 ? 1 : 0) |
70 | | + (mask_bytes >= 16? 1 : 0) |
71 | | + 0; |
72 | | |
73 | | static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); |
74 | | static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); |
75 | | |
76 | 22.9M | void init () { mask = 0; } hb_set_digest_bits_pattern_t<unsigned long, 4u>::init() Line | Count | Source | 76 | 7.64M | void init () { mask = 0; } |
hb_set_digest_bits_pattern_t<unsigned long, 0u>::init() Line | Count | Source | 76 | 7.64M | void init () { mask = 0; } |
hb_set_digest_bits_pattern_t<unsigned long, 9u>::init() Line | Count | Source | 76 | 7.64M | void init () { mask = 0; } |
|
77 | | |
78 | 356M | void add (hb_codepoint_t g) { mask |= mask_for (g); } hb_set_digest_bits_pattern_t<unsigned long, 4u>::add(unsigned int) Line | Count | Source | 78 | 118M | void add (hb_codepoint_t g) { mask |= mask_for (g); } |
hb_set_digest_bits_pattern_t<unsigned long, 0u>::add(unsigned int) Line | Count | Source | 78 | 118M | void add (hb_codepoint_t g) { mask |= mask_for (g); } |
hb_set_digest_bits_pattern_t<unsigned long, 9u>::add(unsigned int) Line | Count | Source | 78 | 118M | void add (hb_codepoint_t g) { mask |= mask_for (g); } |
|
79 | | |
80 | | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
81 | 51.7M | { |
82 | 51.7M | if ((b >> shift) - (a >> shift) >= mask_bits - 1) |
83 | 15.3M | mask = (mask_t) -1; |
84 | 36.3M | else { |
85 | 36.3M | mask_t ma = mask_for (a); |
86 | 36.3M | mask_t mb = mask_for (b); |
87 | 36.3M | mask |= mb + (mb - ma) - (mb < ma); |
88 | 36.3M | } |
89 | 51.7M | return true; |
90 | 51.7M | } hb_set_digest_bits_pattern_t<unsigned long, 4u>::add_range(unsigned int, unsigned int) Line | Count | Source | 81 | 17.2M | { | 82 | 17.2M | if ((b >> shift) - (a >> shift) >= mask_bits - 1) | 83 | 5.48M | mask = (mask_t) -1; | 84 | 11.7M | else { | 85 | 11.7M | mask_t ma = mask_for (a); | 86 | 11.7M | mask_t mb = mask_for (b); | 87 | 11.7M | mask |= mb + (mb - ma) - (mb < ma); | 88 | 11.7M | } | 89 | 17.2M | return true; | 90 | 17.2M | } |
hb_set_digest_bits_pattern_t<unsigned long, 0u>::add_range(unsigned int, unsigned int) Line | Count | Source | 81 | 17.2M | { | 82 | 17.2M | if ((b >> shift) - (a >> shift) >= mask_bits - 1) | 83 | 6.44M | mask = (mask_t) -1; | 84 | 10.8M | else { | 85 | 10.8M | mask_t ma = mask_for (a); | 86 | 10.8M | mask_t mb = mask_for (b); | 87 | 10.8M | mask |= mb + (mb - ma) - (mb < ma); | 88 | 10.8M | } | 89 | 17.2M | return true; | 90 | 17.2M | } |
hb_set_digest_bits_pattern_t<unsigned long, 9u>::add_range(unsigned int, unsigned int) Line | Count | Source | 81 | 17.2M | { | 82 | 17.2M | if ((b >> shift) - (a >> shift) >= mask_bits - 1) | 83 | 3.43M | mask = (mask_t) -1; | 84 | 13.8M | else { | 85 | 13.8M | mask_t ma = mask_for (a); | 86 | 13.8M | mask_t mb = mask_for (b); | 87 | 13.8M | mask |= mb + (mb - ma) - (mb < ma); | 88 | 13.8M | } | 89 | 17.2M | return true; | 90 | 17.2M | } |
|
91 | | |
92 | | template <typename T> |
93 | | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
94 | | { |
95 | | for (unsigned int i = 0; i < count; i++) |
96 | | { |
97 | | add (*array); |
98 | | array = (const T *) (stride + (const char *) array); |
99 | | } |
100 | | } |
101 | | template <typename T> |
102 | | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
103 | | template <typename T> |
104 | | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
105 | 1.91M | { |
106 | 358M | for (unsigned int i = 0; i < count; i++) |
107 | 356M | { |
108 | 356M | add (*array); |
109 | 356M | array = (const T *) (stride + (const char *) array); |
110 | 356M | } |
111 | 1.91M | return true; |
112 | 1.91M | } bool hb_set_digest_bits_pattern_t<unsigned long, 4u>::add_sorted_array<OT::HBGlyphID16>(OT::HBGlyphID16 const*, unsigned int, unsigned int) Line | Count | Source | 105 | 597k | { | 106 | 112M | for (unsigned int i = 0; i < count; i++) | 107 | 112M | { | 108 | 112M | add (*array); | 109 | 112M | array = (const T *) (stride + (const char *) array); | 110 | 112M | } | 111 | 597k | return true; | 112 | 597k | } |
bool hb_set_digest_bits_pattern_t<unsigned long, 0u>::add_sorted_array<OT::HBGlyphID16>(OT::HBGlyphID16 const*, unsigned int, unsigned int) Line | Count | Source | 105 | 597k | { | 106 | 112M | for (unsigned int i = 0; i < count; i++) | 107 | 112M | { | 108 | 112M | add (*array); | 109 | 112M | array = (const T *) (stride + (const char *) array); | 110 | 112M | } | 111 | 597k | return true; | 112 | 597k | } |
bool hb_set_digest_bits_pattern_t<unsigned long, 9u>::add_sorted_array<OT::HBGlyphID16>(OT::HBGlyphID16 const*, unsigned int, unsigned int) Line | Count | Source | 105 | 597k | { | 106 | 112M | for (unsigned int i = 0; i < count; i++) | 107 | 112M | { | 108 | 112M | add (*array); | 109 | 112M | array = (const T *) (stride + (const char *) array); | 110 | 112M | } | 111 | 597k | return true; | 112 | 597k | } |
bool hb_set_digest_bits_pattern_t<unsigned long, 4u>::add_sorted_array<OT::HBGlyphID24>(OT::HBGlyphID24 const*, unsigned int, unsigned int) Line | Count | Source | 105 | 39.5k | { | 106 | 6.56M | for (unsigned int i = 0; i < count; i++) | 107 | 6.52M | { | 108 | 6.52M | add (*array); | 109 | 6.52M | array = (const T *) (stride + (const char *) array); | 110 | 6.52M | } | 111 | 39.5k | return true; | 112 | 39.5k | } |
bool hb_set_digest_bits_pattern_t<unsigned long, 0u>::add_sorted_array<OT::HBGlyphID24>(OT::HBGlyphID24 const*, unsigned int, unsigned int) Line | Count | Source | 105 | 39.5k | { | 106 | 6.56M | for (unsigned int i = 0; i < count; i++) | 107 | 6.52M | { | 108 | 6.52M | add (*array); | 109 | 6.52M | array = (const T *) (stride + (const char *) array); | 110 | 6.52M | } | 111 | 39.5k | return true; | 112 | 39.5k | } |
bool hb_set_digest_bits_pattern_t<unsigned long, 9u>::add_sorted_array<OT::HBGlyphID24>(OT::HBGlyphID24 const*, unsigned int, unsigned int) Line | Count | Source | 105 | 39.5k | { | 106 | 6.56M | for (unsigned int i = 0; i < count; i++) | 107 | 6.52M | { | 108 | 6.52M | add (*array); | 109 | 6.52M | array = (const T *) (stride + (const char *) array); | 110 | 6.52M | } | 111 | 39.5k | return true; | 112 | 39.5k | } |
|
113 | | template <typename T> |
114 | | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
115 | | |
116 | | bool may_have (hb_codepoint_t g) const |
117 | 242M | { return mask & mask_for (g); } hb_set_digest_bits_pattern_t<unsigned long, 4u>::may_have(unsigned int) const Line | Count | Source | 117 | 135M | { return mask & mask_for (g); } |
hb_set_digest_bits_pattern_t<unsigned long, 0u>::may_have(unsigned int) const Line | Count | Source | 117 | 57.6M | { return mask & mask_for (g); } |
hb_set_digest_bits_pattern_t<unsigned long, 9u>::may_have(unsigned int) const Line | Count | Source | 117 | 49.2M | { return mask & mask_for (g); } |
|
118 | | |
119 | | private: |
120 | | |
121 | | static mask_t mask_for (hb_codepoint_t g) |
122 | 671M | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } hb_set_digest_bits_pattern_t<unsigned long, 4u>::mask_for(unsigned int) Line | Count | Source | 122 | 278M | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } |
hb_set_digest_bits_pattern_t<unsigned long, 0u>::mask_for(unsigned int) Line | Count | Source | 122 | 198M | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } |
hb_set_digest_bits_pattern_t<unsigned long, 9u>::mask_for(unsigned int) Line | Count | Source | 122 | 195M | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } |
|
123 | | mask_t mask; |
124 | | }; |
125 | | |
126 | | template <typename head_t, typename tail_t> |
127 | | struct hb_set_digest_combiner_t |
128 | | { |
129 | | void init () |
130 | 15.2M | { |
131 | 15.2M | head.init (); |
132 | 15.2M | tail.init (); |
133 | 15.2M | } hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::init() Line | Count | Source | 130 | 7.64M | { | 131 | 7.64M | head.init (); | 132 | 7.64M | tail.init (); | 133 | 7.64M | } |
hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> >::init() Line | Count | Source | 130 | 7.64M | { | 131 | 7.64M | head.init (); | 132 | 7.64M | tail.init (); | 133 | 7.64M | } |
|
134 | | |
135 | | void add (hb_codepoint_t g) |
136 | | { |
137 | | head.add (g); |
138 | | tail.add (g); |
139 | | } |
140 | | |
141 | | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
142 | 34.4M | { |
143 | 34.4M | head.add_range (a, b); |
144 | 34.4M | tail.add_range (a, b); |
145 | 34.4M | return true; |
146 | 34.4M | } hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::add_range(unsigned int, unsigned int) Line | Count | Source | 142 | 17.2M | { | 143 | 17.2M | head.add_range (a, b); | 144 | 17.2M | tail.add_range (a, b); | 145 | 17.2M | return true; | 146 | 17.2M | } |
hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> >::add_range(unsigned int, unsigned int) Line | Count | Source | 142 | 17.2M | { | 143 | 17.2M | head.add_range (a, b); | 144 | 17.2M | tail.add_range (a, b); | 145 | 17.2M | return true; | 146 | 17.2M | } |
|
147 | | template <typename T> |
148 | | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
149 | | { |
150 | | head.add_array (array, count, stride); |
151 | | tail.add_array (array, count, stride); |
152 | | } |
153 | | template <typename T> |
154 | | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
155 | | template <typename T> |
156 | | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
157 | 1.27M | { |
158 | 1.27M | head.add_sorted_array (array, count, stride); |
159 | 1.27M | tail.add_sorted_array (array, count, stride); |
160 | 1.27M | return true; |
161 | 1.27M | } bool hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::add_sorted_array<OT::HBGlyphID16>(OT::HBGlyphID16 const*, unsigned int, unsigned int) Line | Count | Source | 157 | 597k | { | 158 | 597k | head.add_sorted_array (array, count, stride); | 159 | 597k | tail.add_sorted_array (array, count, stride); | 160 | 597k | return true; | 161 | 597k | } |
bool hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> >::add_sorted_array<OT::HBGlyphID16>(OT::HBGlyphID16 const*, unsigned int, unsigned int) Line | Count | Source | 157 | 597k | { | 158 | 597k | head.add_sorted_array (array, count, stride); | 159 | 597k | tail.add_sorted_array (array, count, stride); | 160 | 597k | return true; | 161 | 597k | } |
bool hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::add_sorted_array<OT::HBGlyphID24>(OT::HBGlyphID24 const*, unsigned int, unsigned int) Line | Count | Source | 157 | 39.5k | { | 158 | 39.5k | head.add_sorted_array (array, count, stride); | 159 | 39.5k | tail.add_sorted_array (array, count, stride); | 160 | 39.5k | return true; | 161 | 39.5k | } |
bool hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> >::add_sorted_array<OT::HBGlyphID24>(OT::HBGlyphID24 const*, unsigned int, unsigned int) Line | Count | Source | 157 | 39.5k | { | 158 | 39.5k | head.add_sorted_array (array, count, stride); | 159 | 39.5k | tail.add_sorted_array (array, count, stride); | 160 | 39.5k | return true; | 161 | 39.5k | } |
|
162 | | template <typename T> |
163 | 637k | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } bool hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::add_sorted_array<OT::HBGlyphID16>(hb_sorted_array_t<OT::HBGlyphID16 const> const&) Line | Count | Source | 163 | 597k | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
bool hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::add_sorted_array<OT::HBGlyphID24>(hb_sorted_array_t<OT::HBGlyphID24 const> const&) Line | Count | Source | 163 | 39.5k | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
|
164 | | |
165 | | bool may_have (hb_codepoint_t g) const |
166 | 193M | { |
167 | 193M | return head.may_have (g) && tail.may_have (g); |
168 | 193M | } hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 4u>, hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> > >::may_have(unsigned int) const Line | Count | Source | 166 | 135M | { | 167 | 135M | return head.may_have (g) && tail.may_have (g); | 168 | 135M | } |
hb_set_digest_combiner_t<hb_set_digest_bits_pattern_t<unsigned long, 0u>, hb_set_digest_bits_pattern_t<unsigned long, 9u> >::may_have(unsigned int) const Line | Count | Source | 166 | 57.6M | { | 167 | 57.6M | return head.may_have (g) && tail.may_have (g); | 168 | 57.6M | } |
|
169 | | |
170 | | private: |
171 | | head_t head; |
172 | | tail_t tail; |
173 | | }; |
174 | | |
175 | | |
176 | | /* |
177 | | * hb_set_digest_t |
178 | | * |
179 | | * This is a combination of digests that performs "best". |
180 | | * There is not much science to this: it's a result of intuition |
181 | | * and testing. |
182 | | */ |
183 | | using hb_set_digest_t = |
184 | | hb_set_digest_combiner_t |
185 | | < |
186 | | hb_set_digest_bits_pattern_t<unsigned long, 4>, |
187 | | hb_set_digest_combiner_t |
188 | | < |
189 | | hb_set_digest_bits_pattern_t<unsigned long, 0>, |
190 | | hb_set_digest_bits_pattern_t<unsigned long, 9> |
191 | | > |
192 | | > |
193 | | ; |
194 | | |
195 | | |
196 | | #endif /* HB_SET_DIGEST_HH */ |