/src/ostree/bsdiff/bsdiff.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright 2003-2005 Colin Percival |
3 | | * Copyright 2012 Matthew Endsley |
4 | | * All rights reserved |
5 | | * |
6 | | * Redistribution and use in source and binary forms, with or without |
7 | | * modification, are permitted providing that the following conditions |
8 | | * are met: |
9 | | * 1. Redistributions of source code must retain the above copyright |
10 | | * notice, this list of conditions and the following disclaimer. |
11 | | * 2. Redistributions in binary form must reproduce the above copyright |
12 | | * notice, this list of conditions and the following disclaimer in the |
13 | | * documentation and/or other materials provided with the distribution. |
14 | | * |
15 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
16 | | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
17 | | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
18 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
19 | | * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
20 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
21 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
22 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, |
23 | | * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING |
24 | | * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
25 | | * POSSIBILITY OF SUCH DAMAGE. |
26 | | */ |
27 | | |
28 | | #include "bsdiff.h" |
29 | | |
30 | | #include <limits.h> |
31 | | #include <string.h> |
32 | | |
33 | 4.27M | #define MIN(x,y) (((x)<(y)) ? (x) : (y)) |
34 | | |
35 | | static void split(int64_t *I,int64_t *V,int64_t start,int64_t len,int64_t h) |
36 | 37.7M | { |
37 | 37.7M | int64_t i,j,k,x,tmp,jj,kk; |
38 | | |
39 | 37.7M | if(len<16) { |
40 | 121M | for(k=start;k<start+len;k+=j) { |
41 | 94.3M | j=1;x=V[I[k]+h]; |
42 | 458M | for(i=1;k+i<start+len;i++) { |
43 | 364M | if(V[I[k+i]+h]<x) { |
44 | 190M | x=V[I[k+i]+h]; |
45 | 190M | j=0; |
46 | 190M | }; |
47 | 364M | if(V[I[k+i]+h]==x) { |
48 | 221M | tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp; |
49 | 221M | j++; |
50 | 221M | }; |
51 | 364M | }; |
52 | 214M | for(i=0;i<j;i++) V[I[k+i]]=k+j-1; |
53 | 94.3M | if(j==1) I[k]=-1; |
54 | 94.3M | }; |
55 | 27.5M | return; |
56 | 27.5M | }; |
57 | | |
58 | 10.1M | x=V[I[start+len/2]+h]; |
59 | 10.1M | jj=0;kk=0; |
60 | 8.75G | for(i=start;i<start+len;i++) { |
61 | 8.74G | if(V[I[i]+h]<x) jj++; |
62 | 8.74G | if(V[I[i]+h]==x) kk++; |
63 | 8.74G | }; |
64 | 10.1M | jj+=start;kk+=jj; |
65 | | |
66 | 10.1M | i=start;j=0;k=0; |
67 | 5.37G | while(i<jj) { |
68 | 5.36G | if(V[I[i]+h]<x) { |
69 | 3.89G | i++; |
70 | 3.89G | } else if(V[I[i]+h]==x) { |
71 | 792M | tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp; |
72 | 792M | j++; |
73 | 792M | } else { |
74 | 674M | tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp; |
75 | 674M | k++; |
76 | 674M | }; |
77 | 5.36G | }; |
78 | | |
79 | 1.79G | while(jj+j<kk) { |
80 | 1.78G | if(V[I[jj+j]+h]==x) { |
81 | 378M | j++; |
82 | 1.40G | } else { |
83 | 1.40G | tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp; |
84 | 1.40G | k++; |
85 | 1.40G | }; |
86 | 1.78G | }; |
87 | | |
88 | 10.1M | if(jj>start) split(I,V,start,jj-start,h); |
89 | | |
90 | 1.18G | for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1; |
91 | 10.1M | if(jj==kk-1) I[jj]=-1; |
92 | | |
93 | 10.1M | if(start+len>kk) split(I,V,kk,start+len-kk,h); |
94 | 10.1M | } |
95 | | |
96 | | static void qsufsort(int64_t *I,int64_t *V,const uint8_t *old,int64_t oldsize) |
97 | 1.19k | { |
98 | 1.19k | int64_t buckets[256]; |
99 | 1.19k | int64_t i,h,len; |
100 | | |
101 | 306k | for(i=0;i<256;i++) buckets[i]=0; |
102 | 85.4M | for(i=0;i<oldsize;i++) buckets[old[i]]++; |
103 | 304k | for(i=1;i<256;i++) buckets[i]+=buckets[i-1]; |
104 | 304k | for(i=255;i>0;i--) buckets[i]=buckets[i-1]; |
105 | 1.19k | buckets[0]=0; |
106 | | |
107 | 85.4M | for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i; |
108 | 1.19k | I[0]=oldsize; |
109 | 85.4M | for(i=0;i<oldsize;i++) V[i]=buckets[old[i]]; |
110 | 1.19k | V[oldsize]=0; |
111 | 304k | for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1; |
112 | 1.19k | I[0]=-1; |
113 | | |
114 | 9.42k | for(h=1;I[0]!=-(oldsize+1);h+=h) { |
115 | 8.23k | len=0; |
116 | 117M | for(i=0;i<oldsize+1;) { |
117 | 117M | if(I[i]<0) { |
118 | 98.7M | len-=I[i]; |
119 | 98.7M | i-=I[i]; |
120 | 98.7M | } else { |
121 | 19.0M | if(len) I[i-len]=-len; |
122 | 19.0M | len=V[I[i]]+1-i; |
123 | 19.0M | split(I,V,i,len,h); |
124 | 19.0M | i+=len; |
125 | 19.0M | len=0; |
126 | 19.0M | }; |
127 | 117M | }; |
128 | 8.23k | if(len) I[i-len]=-len; |
129 | 8.23k | }; |
130 | | |
131 | 85.4M | for(i=0;i<oldsize+1;i++) I[V[i]]=i; |
132 | 1.19k | } |
133 | | |
134 | | static int64_t matchlen(const uint8_t *old,int64_t oldsize,const uint8_t *new,int64_t newsize) |
135 | 1.12M | { |
136 | 1.12M | int64_t i; |
137 | | |
138 | 1.57M | for(i=0;(i<oldsize)&&(i<newsize);i++) |
139 | 1.50M | if(old[i]!=new[i]) break; |
140 | | |
141 | 1.12M | return i; |
142 | 1.12M | } |
143 | | |
144 | | static int64_t search(const int64_t *I,const uint8_t *old,int64_t oldsize, |
145 | | const uint8_t *new,int64_t newsize,int64_t st,int64_t en,int64_t *pos) |
146 | 4.82M | { |
147 | 4.82M | int64_t x,y; |
148 | | |
149 | 4.82M | if(en-st<2) { |
150 | 560k | x=matchlen(old+I[st],oldsize-I[st],new,newsize); |
151 | 560k | y=matchlen(old+I[en],oldsize-I[en],new,newsize); |
152 | | |
153 | 560k | if(x>y) { |
154 | 16.1k | *pos=I[st]; |
155 | 16.1k | return x; |
156 | 544k | } else { |
157 | 544k | *pos=I[en]; |
158 | 544k | return y; |
159 | 544k | } |
160 | 4.26M | }; |
161 | | |
162 | 4.26M | x=st+(en-st)/2; |
163 | 4.26M | if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) { |
164 | 3.10M | return search(I,old,oldsize,new,newsize,x,en,pos); |
165 | 3.10M | } else { |
166 | 1.16M | return search(I,old,oldsize,new,newsize,st,x,pos); |
167 | 1.16M | }; |
168 | 0 | } |
169 | | |
170 | | static void offtout(int64_t x,uint8_t *buf) |
171 | 13.4k | { |
172 | 13.4k | int64_t y; |
173 | | |
174 | 13.4k | if(x<0) y=-x; else y=x; |
175 | | |
176 | 13.4k | buf[0]=y%256;y-=buf[0]; |
177 | 13.4k | y=y/256;buf[1]=y%256;y-=buf[1]; |
178 | 13.4k | y=y/256;buf[2]=y%256;y-=buf[2]; |
179 | 13.4k | y=y/256;buf[3]=y%256;y-=buf[3]; |
180 | 13.4k | y=y/256;buf[4]=y%256;y-=buf[4]; |
181 | 13.4k | y=y/256;buf[5]=y%256;y-=buf[5]; |
182 | 13.4k | y=y/256;buf[6]=y%256;y-=buf[6]; |
183 | 13.4k | y=y/256;buf[7]=y%256; |
184 | | |
185 | 13.4k | if(x<0) buf[7]|=0x80; |
186 | 13.4k | } |
187 | | |
188 | | static int64_t writedata(struct bsdiff_stream* stream, const void* buffer, int64_t length) |
189 | 13.4k | { |
190 | 13.4k | int64_t result = 0; |
191 | | |
192 | 24.8k | while (length > 0) |
193 | 11.4k | { |
194 | 11.4k | const int smallsize = (int)MIN(length, INT_MAX); |
195 | 11.4k | const int writeresult = stream->write(stream, buffer, smallsize); |
196 | 11.4k | if (writeresult == -1) |
197 | 0 | { |
198 | 0 | return -1; |
199 | 0 | } |
200 | | |
201 | 11.4k | result += writeresult; |
202 | 11.4k | length -= smallsize; |
203 | 11.4k | buffer = (uint8_t*)buffer + smallsize; |
204 | 11.4k | } |
205 | | |
206 | 13.4k | return result; |
207 | 13.4k | } |
208 | | |
209 | | struct bsdiff_request |
210 | | { |
211 | | const uint8_t* old; |
212 | | int64_t oldsize; |
213 | | const uint8_t* new; |
214 | | int64_t newsize; |
215 | | struct bsdiff_stream* stream; |
216 | | int64_t *I; |
217 | | uint8_t *buffer; |
218 | | }; |
219 | | |
220 | | static int bsdiff_internal(const struct bsdiff_request req) |
221 | 1.19k | { |
222 | 1.19k | int64_t *I,*V; |
223 | 1.19k | int64_t scan,pos,len; |
224 | 1.19k | int64_t lastscan,lastpos,lastoffset; |
225 | 1.19k | int64_t oldscore,scsc; |
226 | 1.19k | int64_t s,Sf,lenf,Sb,lenb; |
227 | 1.19k | int64_t overlap,Ss,lens; |
228 | 1.19k | int64_t i; |
229 | 1.19k | uint8_t *buffer; |
230 | 1.19k | uint8_t buf[8 * 3]; |
231 | | |
232 | 1.19k | if((V=req.stream->malloc((req.oldsize+1)*sizeof(int64_t)))==NULL) return -1; |
233 | 1.19k | I = req.I; |
234 | | |
235 | 1.19k | qsufsort(I,V,req.old,req.oldsize); |
236 | 1.19k | req.stream->free(V); |
237 | | |
238 | 1.19k | buffer = req.buffer; |
239 | | |
240 | | /* Compute the differences, writing ctrl as we go */ |
241 | 1.19k | scan=0;len=0;pos=0; |
242 | 1.19k | lastscan=0;lastpos=0;lastoffset=0; |
243 | 7.07k | while(scan<req.newsize) { |
244 | 5.88k | oldscore=0; |
245 | | |
246 | 561k | for(scsc=scan+=len;scan<req.newsize;scan++) { |
247 | 560k | len=search(I,req.old,req.oldsize,req.new+scan,req.newsize-scan, |
248 | 560k | 0,req.oldsize,&pos); |
249 | | |
250 | 1.19M | for(;scsc<scan+len;scsc++) |
251 | 637k | if((scsc+lastoffset<req.oldsize) && |
252 | 248k | (req.old[scsc+lastoffset] == req.new[scsc])) |
253 | 16.9k | oldscore++; |
254 | | |
255 | 560k | if(((len==oldscore) && (len!=0)) || |
256 | 558k | (len>oldscore+8)) break; |
257 | | |
258 | 555k | if((scan+lastoffset<req.oldsize) && |
259 | 191k | (req.old[scan+lastoffset] == req.new[scan])) |
260 | 2.29k | oldscore--; |
261 | 555k | }; |
262 | | |
263 | 5.88k | if((len!=oldscore) || (scan==req.newsize)) { |
264 | 4.47k | s=0;Sf=0;lenf=0; |
265 | 288k | for(i=0;(lastscan+i<scan)&&(lastpos+i<req.oldsize);) { |
266 | 284k | if(req.old[lastpos+i]==req.new[lastscan+i]) s++; |
267 | 284k | i++; |
268 | 284k | if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; }; |
269 | 284k | }; |
270 | | |
271 | 4.47k | lenb=0; |
272 | 4.47k | if(scan<req.newsize) { |
273 | 3.28k | s=0;Sb=0; |
274 | 103k | for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { |
275 | 100k | if(req.old[pos-i]==req.new[scan-i]) s++; |
276 | 100k | if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; }; |
277 | 100k | }; |
278 | 3.28k | }; |
279 | | |
280 | 4.47k | if(lastscan+lenf>scan-lenb) { |
281 | 525 | overlap=(lastscan+lenf)-(scan-lenb); |
282 | 525 | s=0;Ss=0;lens=0; |
283 | 12.3k | for(i=0;i<overlap;i++) { |
284 | 11.8k | if(req.new[lastscan+lenf-overlap+i]== |
285 | 11.8k | req.old[lastpos+lenf-overlap+i]) s++; |
286 | 11.8k | if(req.new[scan-lenb+i]== |
287 | 11.8k | req.old[pos-lenb+i]) s--; |
288 | 11.8k | if(s>Ss) { Ss=s; lens=i+1; }; |
289 | 11.8k | }; |
290 | | |
291 | 525 | lenf+=lens-overlap; |
292 | 525 | lenb-=lens; |
293 | 525 | }; |
294 | | |
295 | 4.47k | offtout(lenf,buf); |
296 | 4.47k | offtout((scan-lenb)-(lastscan+lenf),buf+8); |
297 | 4.47k | offtout((pos-lenb)-(lastpos+lenf),buf+16); |
298 | | |
299 | | /* Write control data */ |
300 | 4.47k | if (writedata(req.stream, buf, sizeof(buf))) |
301 | 0 | return -1; |
302 | | |
303 | | /* Write diff data */ |
304 | 91.1k | for(i=0;i<lenf;i++) |
305 | 86.6k | buffer[i]=req.new[lastscan+i]-req.old[lastpos+i]; |
306 | 4.47k | if (writedata(req.stream, buffer, lenf)) |
307 | 0 | return -1; |
308 | | |
309 | | /* Write extra data */ |
310 | 556k | for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) |
311 | 551k | buffer[i]=req.new[lastscan+lenf+i]; |
312 | 4.47k | if (writedata(req.stream, buffer, (scan-lenb)-(lastscan+lenf))) |
313 | 0 | return -1; |
314 | | |
315 | 4.47k | lastscan=scan-lenb; |
316 | 4.47k | lastpos=pos-lenb; |
317 | 4.47k | lastoffset=pos-scan; |
318 | 5.88k | }; |
319 | 5.88k | }; |
320 | | |
321 | 1.19k | return 0; |
322 | 1.19k | } |
323 | | |
324 | | int bsdiff(const uint8_t* old, int64_t oldsize, const uint8_t* new, int64_t newsize, struct bsdiff_stream* stream) |
325 | 1.19k | { |
326 | 1.19k | int result; |
327 | 1.19k | struct bsdiff_request req; |
328 | | |
329 | 1.19k | if((req.I=stream->malloc((oldsize+1)*sizeof(int64_t)))==NULL) |
330 | 0 | return -1; |
331 | | |
332 | 1.19k | if((req.buffer=stream->malloc(newsize+1))==NULL) |
333 | 0 | { |
334 | 0 | stream->free(req.I); |
335 | 0 | return -1; |
336 | 0 | } |
337 | | |
338 | 1.19k | req.old = old; |
339 | 1.19k | req.oldsize = oldsize; |
340 | 1.19k | req.new = new; |
341 | 1.19k | req.newsize = newsize; |
342 | 1.19k | req.stream = stream; |
343 | | |
344 | 1.19k | result = bsdiff_internal(req); |
345 | | |
346 | 1.19k | stream->free(req.buffer); |
347 | 1.19k | stream->free(req.I); |
348 | | |
349 | 1.19k | return result; |
350 | 1.19k | } |
351 | | |
352 | | #if defined(BSDIFF_EXECUTABLE) |
353 | | |
354 | | #include <sys/types.h> |
355 | | |
356 | | #include <bzlib.h> |
357 | | #include <err.h> |
358 | | #include <fcntl.h> |
359 | | #include <stdio.h> |
360 | | #include <stdlib.h> |
361 | | #include <unistd.h> |
362 | | |
363 | | static int bz2_write(struct bsdiff_stream* stream, const void* buffer, int size) |
364 | | { |
365 | | int bz2err; |
366 | | BZFILE* bz2; |
367 | | |
368 | | bz2 = (BZFILE*)stream->opaque; |
369 | | BZ2_bzWrite(&bz2err, bz2, (void*)buffer, size); |
370 | | if (bz2err != BZ_STREAM_END && bz2err != BZ_OK) |
371 | | return -1; |
372 | | |
373 | | return 0; |
374 | | } |
375 | | |
376 | | int main(int argc,char *argv[]) |
377 | | { |
378 | | int fd; |
379 | | int bz2err; |
380 | | uint8_t *old,*new; |
381 | | off_t oldsize,newsize; |
382 | | uint8_t buf[8]; |
383 | | FILE * pf; |
384 | | struct bsdiff_stream stream; |
385 | | BZFILE* bz2; |
386 | | |
387 | | memset(&bz2, 0, sizeof(bz2)); |
388 | | stream.malloc = malloc; |
389 | | stream.free = free; |
390 | | stream.write = bz2_write; |
391 | | |
392 | | if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]); |
393 | | |
394 | | /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure |
395 | | that we never try to malloc(0) and get a NULL pointer */ |
396 | | if(((fd=open(argv[1],O_RDONLY,0))<0) || |
397 | | ((oldsize=lseek(fd,0,SEEK_END))==-1) || |
398 | | ((old=malloc(oldsize+1))==NULL) || |
399 | | (lseek(fd,0,SEEK_SET)!=0) || |
400 | | (read(fd,old,oldsize)!=oldsize) || |
401 | | (close(fd)==-1)) err(1,"%s",argv[1]); |
402 | | |
403 | | |
404 | | /* Allocate newsize+1 bytes instead of newsize bytes to ensure |
405 | | that we never try to malloc(0) and get a NULL pointer */ |
406 | | if(((fd=open(argv[2],O_RDONLY,0))<0) || |
407 | | ((newsize=lseek(fd,0,SEEK_END))==-1) || |
408 | | ((new=malloc(newsize+1))==NULL) || |
409 | | (lseek(fd,0,SEEK_SET)!=0) || |
410 | | (read(fd,new,newsize)!=newsize) || |
411 | | (close(fd)==-1)) err(1,"%s",argv[2]); |
412 | | |
413 | | /* Create the patch file */ |
414 | | if ((pf = fopen(argv[3], "w")) == NULL) |
415 | | err(1, "%s", argv[3]); |
416 | | |
417 | | /* Write header (signature+newsize)*/ |
418 | | offtout(newsize, buf); |
419 | | if (fwrite("ENDSLEY/BSDIFF43", 16, 1, pf) != 1 || |
420 | | fwrite(buf, sizeof(buf), 1, pf) != 1) |
421 | | err(1, "Failed to write header"); |
422 | | |
423 | | |
424 | | if (NULL == (bz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0))) |
425 | | errx(1, "BZ2_bzWriteOpen, bz2err=%d", bz2err); |
426 | | |
427 | | stream.opaque = bz2; |
428 | | if (bsdiff(old, oldsize, new, newsize, &stream)) |
429 | | err(1, "bsdiff"); |
430 | | |
431 | | BZ2_bzWriteClose(&bz2err, bz2, 0, NULL, NULL); |
432 | | if (bz2err != BZ_OK) |
433 | | err(1, "BZ2_bzWriteClose, bz2err=%d", bz2err); |
434 | | |
435 | | if (fclose(pf)) |
436 | | err(1, "fclose"); |
437 | | |
438 | | /* Free the memory we used */ |
439 | | free(old); |
440 | | free(new); |
441 | | |
442 | | return 0; |
443 | | } |
444 | | |
445 | | #endif |