/work/workdir/UnpackedTarball/harfbuzz/src/hb-cff-specializer.hh
Line | Count | Source |
1 | | /* |
2 | | * CFF CharString Specializer |
3 | | * |
4 | | * Optimizes CharString bytecode by using specialized operators |
5 | | * (hlineto, vlineto, hhcurveto, etc.) to save bytes and respects |
6 | | * CFF1 stack limit (48 values). |
7 | | * |
8 | | * Based on fontTools.cffLib.specializer |
9 | | */ |
10 | | |
11 | | #ifndef HB_CFF_SPECIALIZER_HH |
12 | | #define HB_CFF_SPECIALIZER_HH |
13 | | |
14 | | #include "hb.hh" |
15 | | #include "hb-cff-interp-cs-common.hh" |
16 | | |
17 | | namespace CFF { |
18 | | |
19 | | /* CharString command representation - forward declared in hb-subset-cff-common.hh */ |
20 | | |
21 | | /* Check if a value is effectively zero */ |
22 | | static inline bool |
23 | | is_zero (const number_t &n) |
24 | 0 | { |
25 | 0 | return n.to_int () == 0; |
26 | 0 | } |
27 | | |
28 | | /* Generalize CharString commands to canonical form |
29 | | * |
30 | | * Converts all operators to their general forms and breaks down |
31 | | * multi-segment operators into single segments. This ensures we |
32 | | * start from a clean baseline before specialization. |
33 | | * |
34 | | * Based on fontTools.cffLib.specializer.generalizeCommands |
35 | | */ |
36 | | static void |
37 | | generalize_commands (hb_vector_t<cs_command_t> &commands) |
38 | 0 | { |
39 | 0 | hb_vector_t<cs_command_t> result; |
40 | 0 | result.alloc (commands.length * 2); /* Estimate: might expand */ |
41 | |
|
42 | 0 | for (unsigned i = 0; i < commands.length; i++) |
43 | 0 | { |
44 | 0 | auto &cmd = commands[i]; |
45 | |
|
46 | 0 | switch (cmd.op) |
47 | 0 | { |
48 | 0 | case OpCode_hmoveto: |
49 | 0 | case OpCode_vmoveto: |
50 | 0 | { |
51 | | /* Convert to rmoveto with explicit dx,dy */ |
52 | 0 | cs_command_t gen (OpCode_rmoveto); |
53 | 0 | gen.args.alloc (2); |
54 | |
|
55 | 0 | if (cmd.op == OpCode_hmoveto && cmd.args.length >= 1) |
56 | 0 | { |
57 | 0 | gen.args.push (cmd.args[0]); /* dx */ |
58 | 0 | number_t zero; zero.set_int (0); |
59 | 0 | gen.args.push (zero); /* dy = 0 */ |
60 | 0 | } |
61 | 0 | else if (cmd.op == OpCode_vmoveto && cmd.args.length >= 1) |
62 | 0 | { |
63 | 0 | number_t zero; zero.set_int (0); |
64 | 0 | gen.args.push (zero); /* dx = 0 */ |
65 | 0 | gen.args.push (cmd.args[0]); /* dy */ |
66 | 0 | } |
67 | 0 | result.push (gen); |
68 | 0 | break; |
69 | 0 | } |
70 | | |
71 | 0 | case OpCode_hlineto: |
72 | 0 | case OpCode_vlineto: |
73 | 0 | { |
74 | | /* Convert h/v lineto to rlineto, breaking into single segments |
75 | | * hlineto alternates: dx1 (→ dx1,0) dy1 (→ 0,dy1) dx2 (→ dx2,0) ... |
76 | | * vlineto alternates: dy1 (→ 0,dy1) dx1 (→ dx1,0) dy2 (→ 0,dy2) ... */ |
77 | 0 | bool is_h = (cmd.op == OpCode_hlineto); |
78 | 0 | number_t zero; zero.set_int (0); |
79 | |
|
80 | 0 | for (unsigned j = 0; j < cmd.args.length; j++) |
81 | 0 | { |
82 | 0 | cs_command_t seg (OpCode_rlineto); |
83 | 0 | seg.args.alloc (2); |
84 | |
|
85 | 0 | bool is_horizontal = is_h ? (j % 2 == 0) : (j % 2 == 1); |
86 | 0 | if (is_horizontal) |
87 | 0 | { |
88 | 0 | seg.args.push (cmd.args[j]); /* dx */ |
89 | 0 | seg.args.push (zero); /* dy = 0 */ |
90 | 0 | } |
91 | 0 | else |
92 | 0 | { |
93 | 0 | seg.args.push (zero); /* dx = 0 */ |
94 | 0 | seg.args.push (cmd.args[j]); /* dy */ |
95 | 0 | } |
96 | 0 | result.push (seg); |
97 | 0 | } |
98 | 0 | break; |
99 | 0 | } |
100 | | |
101 | 0 | case OpCode_rlineto: |
102 | 0 | { |
103 | | /* Break into single segments (dx,dy pairs) */ |
104 | 0 | for (unsigned j = 0; j + 1 < cmd.args.length; j += 2) |
105 | 0 | { |
106 | 0 | cs_command_t seg (OpCode_rlineto); |
107 | 0 | seg.args.alloc (2); |
108 | 0 | seg.args.push (cmd.args[j]); |
109 | 0 | seg.args.push (cmd.args[j + 1]); |
110 | 0 | result.push (seg); |
111 | 0 | } |
112 | 0 | break; |
113 | 0 | } |
114 | | |
115 | 0 | case OpCode_rrcurveto: |
116 | 0 | { |
117 | | /* Break into single segments (6 args each) */ |
118 | 0 | for (unsigned j = 0; j + 5 < cmd.args.length; j += 6) |
119 | 0 | { |
120 | 0 | cs_command_t seg (OpCode_rrcurveto); |
121 | 0 | seg.args.alloc (6); |
122 | 0 | for (unsigned k = 0; k < 6; k++) |
123 | 0 | seg.args.push (cmd.args[j + k]); |
124 | 0 | result.push (seg); |
125 | 0 | } |
126 | 0 | break; |
127 | 0 | } |
128 | | |
129 | 0 | default: |
130 | | /* Keep other operators as-is */ |
131 | 0 | result.push (cmd); |
132 | 0 | break; |
133 | 0 | } |
134 | 0 | } |
135 | | |
136 | | /* Replace commands with generalized result */ |
137 | 0 | commands.resize (0); |
138 | 0 | for (unsigned i = 0; i < result.length; i++) |
139 | 0 | commands.push (result[i]); |
140 | 0 | } |
141 | | |
142 | | /* Specialize CharString commands to optimize bytecode size |
143 | | * |
144 | | * Follows fontTools approach: |
145 | | * 0. Generalize: Break down to canonical single-segment form |
146 | | * 1. Specialize: Convert rmoveto/rlineto to h/v variants when dx or dy is zero |
147 | | * 2. Combine: Merge adjacent compatible operators |
148 | | * 3. Enforce: Respect maxstack limit (default 48 for CFF1) |
149 | | * |
150 | | * This ensures we never exceed stack depth while optimizing bytecode. |
151 | | */ |
152 | | static void |
153 | | specialize_commands (hb_vector_t<cs_command_t> &commands, |
154 | | unsigned maxstack = 48) |
155 | 0 | { |
156 | 0 | if (commands.length == 0) return; |
157 | | |
158 | | /* Pass 0: Generalize to canonical form (fontTools does this first) */ |
159 | 0 | generalize_commands (commands); |
160 | | |
161 | | /* Pass 1: Specialize rmoveto/rlineto into h/v variants */ |
162 | 0 | for (unsigned i = 0; i < commands.length; i++) |
163 | 0 | { |
164 | 0 | auto &cmd = commands[i]; |
165 | |
|
166 | 0 | if ((cmd.op == OpCode_rmoveto || cmd.op == OpCode_rlineto) && |
167 | 0 | cmd.args.length == 2) |
168 | 0 | { |
169 | 0 | bool dx_zero = is_zero (cmd.args[0]); |
170 | 0 | bool dy_zero = is_zero (cmd.args[1]); |
171 | |
|
172 | 0 | if (dx_zero && !dy_zero) |
173 | 0 | { |
174 | | /* Vertical movement (dx=0): keep only dy */ |
175 | 0 | cmd.op = (cmd.op == OpCode_rmoveto) ? OpCode_vmoveto : OpCode_vlineto; |
176 | | /* Shift dy to position 0 */ |
177 | 0 | cmd.args[0] = cmd.args[1]; |
178 | 0 | cmd.args.resize (1); |
179 | 0 | } |
180 | 0 | else if (!dx_zero && dy_zero) |
181 | 0 | { |
182 | | /* Horizontal movement (dy=0): keep only dx */ |
183 | 0 | cmd.op = (cmd.op == OpCode_rmoveto) ? OpCode_hmoveto : OpCode_hlineto; |
184 | 0 | cmd.args.resize (1); /* Keep only dx */ |
185 | 0 | } |
186 | | /* else: both zero or both non-zero, keep as rmoveto/rlineto */ |
187 | 0 | } |
188 | 0 | } |
189 | | |
190 | | /* Pass 2: Combine adjacent hlineto/vlineto operators |
191 | | * hlineto can take multiple args alternating with vlineto |
192 | | * This saves operator bytes */ |
193 | 0 | for (int i = (int)commands.length - 1; i > 0; i--) |
194 | 0 | { |
195 | 0 | auto &cmd = commands[i]; |
196 | 0 | auto &prev = commands[i-1]; |
197 | | |
198 | | /* Combine adjacent hlineto + vlineto or vlineto + hlineto */ |
199 | 0 | if ((prev.op == OpCode_hlineto && cmd.op == OpCode_vlineto) || |
200 | 0 | (prev.op == OpCode_vlineto && cmd.op == OpCode_hlineto)) |
201 | 0 | { |
202 | | /* Check stack depth */ |
203 | 0 | unsigned combined_args = prev.args.length + cmd.args.length; |
204 | 0 | if (combined_args < maxstack) |
205 | 0 | { |
206 | | /* Merge into first command, keep its operator */ |
207 | 0 | for (unsigned j = 0; j < cmd.args.length; j++) |
208 | 0 | prev.args.push (cmd.args[j]); |
209 | 0 | commands.remove_ordered (i); |
210 | 0 | i++; /* Adjust for removed element */ |
211 | 0 | } |
212 | 0 | } |
213 | 0 | } |
214 | | |
215 | | /* Pass 3: Combine adjacent identical operators */ |
216 | 0 | for (int i = (int)commands.length - 1; i > 0; i--) |
217 | 0 | { |
218 | 0 | auto &cmd = commands[i]; |
219 | 0 | auto &prev = commands[i-1]; |
220 | | |
221 | | /* Combine same operators (e.g., rlineto + rlineto) */ |
222 | 0 | if (prev.op == cmd.op && |
223 | 0 | (cmd.op == OpCode_rlineto || cmd.op == OpCode_hlineto || |
224 | 0 | cmd.op == OpCode_vlineto || cmd.op == OpCode_rrcurveto)) |
225 | 0 | { |
226 | | /* Check stack depth */ |
227 | 0 | unsigned combined_args = prev.args.length + cmd.args.length; |
228 | 0 | if (combined_args < maxstack) |
229 | 0 | { |
230 | | /* Merge args */ |
231 | 0 | for (unsigned j = 0; j < cmd.args.length; j++) |
232 | 0 | prev.args.push (cmd.args[j]); |
233 | 0 | commands.remove_ordered (i); |
234 | 0 | i++; /* Adjust for removed element */ |
235 | 0 | } |
236 | 0 | } |
237 | 0 | } |
238 | 0 | } |
239 | | |
240 | | /* Encode commands back to binary CharString */ |
241 | | static bool |
242 | | encode_commands (const hb_vector_t<cs_command_t> &commands, |
243 | | str_buff_t &output) |
244 | 0 | { |
245 | 0 | for (const auto &cmd : commands) |
246 | 0 | { |
247 | 0 | str_encoder_t encoder (output); |
248 | | |
249 | | /* Encode arguments */ |
250 | 0 | for (const auto &arg : cmd.args) |
251 | 0 | encoder.encode_num_cs (arg); |
252 | | |
253 | | /* Encode operator */ |
254 | 0 | if (cmd.op != OpCode_Invalid) |
255 | 0 | encoder.encode_op (cmd.op); |
256 | | |
257 | | /* hintmask/cntrmask are followed by raw mask bytes. */ |
258 | 0 | if (cmd.op == OpCode_hintmask || cmd.op == OpCode_cntrmask) |
259 | 0 | { |
260 | 0 | for (const auto &byte : cmd.mask_bytes) |
261 | 0 | encoder.encode_byte (byte); |
262 | 0 | } |
263 | |
|
264 | 0 | if (encoder.in_error ()) |
265 | 0 | return false; |
266 | 0 | } |
267 | | |
268 | 0 | return true; |
269 | 0 | } |
270 | | |
271 | | } /* namespace CFF */ |
272 | | |
273 | | #endif /* HB_CFF_SPECIALIZER_HH */ |