/src/mercurial/mercurial/mpatch.c
Line | Count | Source |
1 | | /* |
2 | | mpatch.c - efficient binary patching for Mercurial |
3 | | |
4 | | This implements a patch algorithm that's O(m + nlog n) where m is the |
5 | | size of the output and n is the number of patches. |
6 | | |
7 | | Given a list of binary patches, it unpacks each into a hunk list, |
8 | | then combines the hunk lists with a treewise recursion to form a |
9 | | single hunk list. This hunk list is then applied to the original |
10 | | text. |
11 | | |
12 | | The text (or binary) fragments are copied directly from their source |
13 | | Python objects into a preallocated output string to avoid the |
14 | | allocation of intermediate Python objects. Working memory is about 2x |
15 | | the total number of hunks. |
16 | | |
17 | | Copyright 2005, 2006 Olivia Mackall <olivia@selenic.com> |
18 | | |
19 | | This software may be used and distributed according to the terms |
20 | | of the GNU General Public License, incorporated herein by reference. |
21 | | */ |
22 | | |
23 | | #include <limits.h> |
24 | | #include <stdlib.h> |
25 | | #include <string.h> |
26 | | |
27 | | #include "bitmanipulation.h" |
28 | | #include "compat.h" |
29 | | #include "mpatch.h" |
30 | | |
31 | | /* VC9 doesn't include bool and lacks stdbool.h based on cext/util.h */ |
32 | | #if defined(_MSC_VER) || __STDC_VERSION__ < 199901L |
33 | | #define true 1 |
34 | | #define false 0 |
35 | | typedef unsigned char bool; |
36 | | #else |
37 | | #include <stdbool.h> |
38 | | #endif |
39 | | |
40 | | static struct mpatch_flist *lalloc(ssize_t size) |
41 | 27.9k | { |
42 | 27.9k | struct mpatch_flist *a = NULL; |
43 | | |
44 | 27.9k | if (size < 1) { |
45 | 6.44k | size = 1; |
46 | 6.44k | } |
47 | | |
48 | 27.9k | a = (struct mpatch_flist *)malloc(sizeof(struct mpatch_flist)); |
49 | 27.9k | if (a) { |
50 | 27.9k | a->base = (struct mpatch_frag *)malloc( |
51 | 27.9k | sizeof(struct mpatch_frag) * size); |
52 | 27.9k | if (a->base) { |
53 | 27.9k | a->head = a->tail = a->base; |
54 | 27.9k | return a; |
55 | 27.9k | } |
56 | 0 | free(a); |
57 | 0 | } |
58 | 0 | return NULL; |
59 | 27.9k | } |
60 | | |
61 | | void mpatch_lfree(struct mpatch_flist *a) |
62 | 32.4k | { |
63 | 32.4k | if (a) { |
64 | 27.9k | free(a->base); |
65 | 27.9k | free(a); |
66 | 27.9k | } |
67 | 32.4k | } |
68 | | |
69 | | static ssize_t lsize(struct mpatch_flist *a) |
70 | 45.0k | { |
71 | 45.0k | return a->tail - a->head; |
72 | 45.0k | } |
73 | | |
74 | | /* add helper to add src and *dest iff it won't overflow */ |
75 | | static inline bool safeadd(int src, int *dest) |
76 | 1.77M | { |
77 | 1.77M | if ((src > 0) == (*dest > 0)) { |
78 | 1.33M | if (*dest > 0) { |
79 | 347k | if (src > (INT_MAX - *dest)) { |
80 | 1.41k | return false; |
81 | 1.41k | } |
82 | 991k | } else { |
83 | 991k | if (src < (INT_MIN - *dest)) { |
84 | 12.6k | return false; |
85 | 12.6k | } |
86 | 991k | } |
87 | 1.33M | } |
88 | 1.76M | *dest += src; |
89 | 1.76M | return true; |
90 | 1.77M | } |
91 | | |
92 | | /* subtract src from dest and store result in dest */ |
93 | | static inline bool safesub(int src, int *dest) |
94 | 823k | { |
95 | 823k | if (((src > 0) && (*dest < INT_MIN + src)) || |
96 | 823k | ((src < 0) && (*dest > INT_MAX + src))) { |
97 | 1.18k | return false; |
98 | 1.18k | } |
99 | 822k | *dest -= src; |
100 | 822k | return true; |
101 | 823k | } |
102 | | |
103 | | /* move hunks in source that are less cut to dest, compensating |
104 | | for changes in offset. the last hunk may be split if necessary. |
105 | | */ |
106 | | static int gather(struct mpatch_flist *dest, struct mpatch_flist *src, int cut, |
107 | | int offset) |
108 | 367k | { |
109 | 367k | struct mpatch_frag *d = dest->tail, *s = src->head; |
110 | 367k | int postend, c, l; |
111 | | |
112 | 379k | while (s != src->tail) { |
113 | 349k | int soffset = s->start; |
114 | 349k | if (!safeadd(offset, &soffset)) { |
115 | 434 | break; /* add would overflow, oh well */ |
116 | 434 | } |
117 | 348k | if (soffset >= cut) { |
118 | 328k | break; /* we've gone far enough */ |
119 | 328k | } |
120 | | |
121 | 20.1k | postend = offset; |
122 | 20.1k | if (!safeadd(s->start, &postend) || |
123 | 20.1k | !safeadd(s->len, &postend)) { |
124 | 263 | break; |
125 | 263 | } |
126 | 19.9k | if (postend <= cut) { |
127 | | /* save this hunk */ |
128 | 18.7k | int tmp = s->start; |
129 | 18.7k | if (!safesub(s->end, &tmp)) { |
130 | 197 | break; |
131 | 197 | } |
132 | 18.5k | if (!safeadd(s->len, &tmp)) { |
133 | 0 | break; |
134 | 0 | } |
135 | 18.5k | if (!safeadd(tmp, &offset)) { |
136 | 6.07k | break; /* add would overflow, oh well */ |
137 | 6.07k | } |
138 | 12.5k | *d++ = *s++; |
139 | 12.5k | } else { |
140 | | /* break up this hunk */ |
141 | 1.13k | c = cut; |
142 | 1.13k | if (!safesub(offset, &c)) { |
143 | 37 | break; |
144 | 37 | } |
145 | 1.10k | if (s->end < c) { |
146 | 636 | c = s->end; |
147 | 636 | } |
148 | 1.10k | l = cut - offset - s->start; |
149 | 1.10k | if (s->len < l) { |
150 | 0 | l = s->len; |
151 | 0 | } |
152 | | |
153 | 1.10k | offset += s->start + l - c; |
154 | | |
155 | 1.10k | d->start = s->start; |
156 | 1.10k | d->end = c; |
157 | 1.10k | d->len = l; |
158 | 1.10k | d->data = s->data; |
159 | 1.10k | d++; |
160 | 1.10k | s->start = c; |
161 | 1.10k | s->len = s->len - l; |
162 | 1.10k | s->data = s->data + l; |
163 | | |
164 | 1.10k | break; |
165 | 1.13k | } |
166 | 19.9k | } |
167 | | |
168 | 367k | dest->tail = d; |
169 | 367k | src->head = s; |
170 | 367k | return offset; |
171 | 367k | } |
172 | | |
173 | | /* like gather, but with no output list */ |
174 | | static int discard(struct mpatch_flist *src, int cut, int offset) |
175 | 367k | { |
176 | 367k | struct mpatch_frag *s = src->head; |
177 | 367k | int postend, c, l; |
178 | | |
179 | 429k | while (s != src->tail) { |
180 | 398k | int cmpcut = s->start; |
181 | 398k | if (!safeadd(offset, &cmpcut)) { |
182 | 441 | break; |
183 | 441 | } |
184 | 397k | if (cmpcut >= cut) { |
185 | 327k | break; |
186 | 327k | } |
187 | | |
188 | 70.5k | postend = offset; |
189 | 70.5k | if (!safeadd(s->start, &postend)) { |
190 | 0 | break; |
191 | 0 | } |
192 | 70.5k | if (!safeadd(s->len, &postend)) { |
193 | 494 | break; |
194 | 494 | } |
195 | 70.0k | if (postend <= cut) { |
196 | | /* do the subtraction first to avoid UB integer overflow |
197 | | */ |
198 | 68.8k | int tmp = s->start; |
199 | 68.8k | if (!safesub(s->end, &tmp)) { |
200 | 198 | break; |
201 | 198 | } |
202 | 68.6k | if (!safeadd(s->len, &tmp)) { |
203 | 0 | break; |
204 | 0 | } |
205 | 68.6k | if (!safeadd(tmp, &offset)) { |
206 | 6.12k | break; |
207 | 6.12k | } |
208 | 62.5k | s++; |
209 | 62.5k | } else { |
210 | 1.26k | c = cut; |
211 | 1.26k | if (!safesub(offset, &c)) { |
212 | 43 | break; |
213 | 43 | } |
214 | 1.22k | if (s->end < c) { |
215 | 712 | c = s->end; |
216 | 712 | } |
217 | 1.22k | l = cut - offset - s->start; |
218 | 1.22k | if (s->len < l) { |
219 | 0 | l = s->len; |
220 | 0 | } |
221 | | |
222 | 1.22k | offset += s->start + l - c; |
223 | 1.22k | s->start = c; |
224 | 1.22k | s->len = s->len - l; |
225 | 1.22k | s->data = s->data + l; |
226 | | |
227 | 1.22k | break; |
228 | 1.26k | } |
229 | 70.0k | } |
230 | | |
231 | 367k | src->head = s; |
232 | 367k | return offset; |
233 | 367k | } |
234 | | |
235 | | /* combine hunk lists a and b, while adjusting b for offset changes in a/ |
236 | | this deletes a and b and returns the resultant list. */ |
237 | | static struct mpatch_flist *combine(struct mpatch_flist *a, |
238 | | struct mpatch_flist *b) |
239 | 14.4k | { |
240 | 14.4k | struct mpatch_flist *c = NULL; |
241 | 14.4k | struct mpatch_frag *bh, *ct; |
242 | 14.4k | int offset = 0, post; |
243 | | |
244 | 14.4k | if (a && b) { |
245 | 11.6k | c = lalloc((lsize(a) + lsize(b)) * 2); |
246 | 11.6k | } |
247 | | |
248 | 14.4k | if (c) { |
249 | | |
250 | 378k | for (bh = b->head; bh != b->tail; bh++) { |
251 | | /* save old hunks */ |
252 | 367k | offset = gather(c, a, bh->start, offset); |
253 | | |
254 | | /* discard replaced hunks */ |
255 | 367k | post = discard(a, bh->end, offset); |
256 | | |
257 | | /* insert new hunk */ |
258 | 367k | ct = c->tail; |
259 | 367k | ct->start = bh->start; |
260 | 367k | ct->end = bh->end; |
261 | 367k | if (!safesub(offset, &(ct->start)) || |
262 | 366k | !safesub(post, &(ct->end))) { |
263 | | /* It was already possible to exit |
264 | | * this function with a return value |
265 | | * of NULL before the safesub()s were |
266 | | * added, so this should be fine. */ |
267 | 708 | mpatch_lfree(c); |
268 | 708 | c = NULL; |
269 | 708 | goto done; |
270 | 708 | } |
271 | 366k | ct->len = bh->len; |
272 | 366k | ct->data = bh->data; |
273 | 366k | c->tail++; |
274 | 366k | offset = post; |
275 | 366k | } |
276 | | |
277 | | /* hold on to tail from a */ |
278 | 10.9k | memcpy(c->tail, a->head, sizeof(struct mpatch_frag) * lsize(a)); |
279 | 10.9k | c->tail += lsize(a); |
280 | 10.9k | } |
281 | 14.4k | done: |
282 | 14.4k | mpatch_lfree(a); |
283 | 14.4k | mpatch_lfree(b); |
284 | 14.4k | return c; |
285 | 14.4k | } |
286 | | |
287 | | /* decode a binary patch into a hunk list */ |
288 | | int mpatch_decode(const char *bin, ssize_t len, struct mpatch_flist **res) |
289 | 16.3k | { |
290 | 16.3k | struct mpatch_flist *l; |
291 | 16.3k | struct mpatch_frag *lt; |
292 | 16.3k | int pos = 0; |
293 | | |
294 | | /* assume worst case size, we won't have many of these lists */ |
295 | 16.3k | l = lalloc(len / 12 + 1); |
296 | 16.3k | if (!l) { |
297 | 0 | return MPATCH_ERR_NO_MEM; |
298 | 0 | } |
299 | | |
300 | 16.3k | lt = l->tail; |
301 | | |
302 | | /* We check against len-11 to ensure we have at least 12 bytes |
303 | | left in the patch so we can read our three be32s out of it. */ |
304 | 354k | while (pos >= 0 && pos < (len - 11)) { |
305 | 338k | lt->start = getbe32(bin + pos); |
306 | 338k | lt->end = getbe32(bin + pos + 4); |
307 | 338k | lt->len = getbe32(bin + pos + 8); |
308 | 338k | if (lt->start < 0 || lt->start > lt->end || lt->len < 0) { |
309 | 629 | break; /* sanity check */ |
310 | 629 | } |
311 | 337k | if (!safeadd(12, &pos)) { |
312 | 0 | break; |
313 | 0 | } |
314 | 337k | lt->data = bin + pos; |
315 | 337k | if (!safeadd(lt->len, &pos)) { |
316 | 209 | break; |
317 | 209 | } |
318 | 337k | lt++; |
319 | 337k | } |
320 | | |
321 | 16.3k | if (pos != len) { |
322 | 1.70k | mpatch_lfree(l); |
323 | 1.70k | return MPATCH_ERR_CANNOT_BE_DECODED; |
324 | 1.70k | } |
325 | | |
326 | 14.6k | l->tail = lt; |
327 | 14.6k | *res = l; |
328 | 14.6k | return 0; |
329 | 16.3k | } |
330 | | |
331 | | /* calculate the size of resultant text */ |
332 | | ssize_t mpatch_calcsize(ssize_t len, struct mpatch_flist *l) |
333 | 1.10k | { |
334 | 1.10k | ssize_t outlen = 0, last = 0; |
335 | 1.10k | struct mpatch_frag *f = l->head; |
336 | | |
337 | 88.3k | while (f != l->tail) { |
338 | 88.0k | if (f->start < last || f->end > len) { |
339 | 887 | return MPATCH_ERR_INVALID_PATCH; |
340 | 887 | } |
341 | 87.1k | outlen += f->start - last; |
342 | 87.1k | last = f->end; |
343 | 87.1k | outlen += f->len; |
344 | 87.1k | f++; |
345 | 87.1k | } |
346 | | |
347 | 219 | outlen += len - last; |
348 | 219 | return outlen; |
349 | 1.10k | } |
350 | | |
351 | | int mpatch_apply(char *buf, const char *orig, ssize_t len, |
352 | | struct mpatch_flist *l) |
353 | 219 | { |
354 | 219 | struct mpatch_frag *f = l->head; |
355 | 219 | int last = 0; |
356 | 219 | char *p = buf; |
357 | | |
358 | 66.6k | while (f != l->tail) { |
359 | 66.4k | if (f->start < last || f->start > len || f->end > len || |
360 | 66.4k | last < 0) { |
361 | 40 | return MPATCH_ERR_INVALID_PATCH; |
362 | 40 | } |
363 | 66.4k | memcpy(p, orig + last, f->start - last); |
364 | 66.4k | p += f->start - last; |
365 | 66.4k | memcpy(p, f->data, f->len); |
366 | 66.4k | last = f->end; |
367 | 66.4k | p += f->len; |
368 | 66.4k | f++; |
369 | 66.4k | } |
370 | 179 | if (last < 0) { |
371 | 23 | return MPATCH_ERR_INVALID_PATCH; |
372 | 23 | } |
373 | 156 | memcpy(p, orig + last, len - last); |
374 | 156 | return 0; |
375 | 179 | } |
376 | | |
377 | | /* recursively generate a patch of all bins between start and end */ |
378 | | struct mpatch_flist * |
379 | | mpatch_fold(void *bins, struct mpatch_flist *(*get_next_item)(void *, ssize_t), |
380 | | ssize_t start, ssize_t end) |
381 | 30.7k | { |
382 | 30.7k | ssize_t len; |
383 | | |
384 | 30.7k | if (start + 1 == end) { |
385 | | /* trivial case, output a decoded list */ |
386 | 16.3k | return get_next_item(bins, start); |
387 | 16.3k | } |
388 | | |
389 | | /* divide and conquer, memory management is elsewhere */ |
390 | 14.4k | len = (end - start) / 2; |
391 | 14.4k | return combine(mpatch_fold(bins, get_next_item, start, start + len), |
392 | 14.4k | mpatch_fold(bins, get_next_item, start + len, end)); |
393 | 30.7k | } |