1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright 2003-2005 Colin Percival 5 * All rights reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted providing that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/types.h> 33 34 #include <bzlib.h> 35 #include <err.h> 36 #include <errno.h> 37 #include <fcntl.h> 38 #include <limits.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 45 #ifndef O_BINARY 46 #define O_BINARY 0 47 #endif 48 49 #include "divsufsort64.h" 50 #define saidx_t saidx64_t 51 #define divsufsort divsufsort64 52 53 #define MIN(x,y) (((x)<(y)) ? (x) : (y)) 54 55 static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) 56 { 57 off_t i; 58 59 for(i=0;(i<oldsize)&&(i<newsize);i++) 60 if(old[i]!=new[i]) break; 61 62 return i; 63 } 64 65 static off_t search(off_t *I,u_char *old,off_t oldsize, 66 u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) 67 { 68 off_t x,y; 69 70 if(en-st<2) { 71 x=matchlen(old+I[st],oldsize-I[st],new,newsize); 72 y=matchlen(old+I[en],oldsize-I[en],new,newsize); 73 74 if(x>y) { 75 *pos=I[st]; 76 return x; 77 } else { 78 *pos=I[en]; 79 return y; 80 } 81 } 82 83 x=st+(en-st)/2; 84 if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) { 85 return search(I,old,oldsize,new,newsize,x,en,pos); 86 } else { 87 return search(I,old,oldsize,new,newsize,st,x,pos); 88 }; 89 } 90 91 static void offtout(off_t x,u_char *buf) 92 { 93 off_t y; 94 95 if(x<0) y=-x; else y=x; 96 97 buf[0]=y%256;y-=buf[0]; 98 y=y/256;buf[1]=y%256;y-=buf[1]; 99 y=y/256;buf[2]=y%256;y-=buf[2]; 100 y=y/256;buf[3]=y%256;y-=buf[3]; 101 y=y/256;buf[4]=y%256;y-=buf[4]; 102 y=y/256;buf[5]=y%256;y-=buf[5]; 103 y=y/256;buf[6]=y%256;y-=buf[6]; 104 y=y/256;buf[7]=y%256; 105 106 if(x<0) buf[7]|=0x80; 107 } 108 109 static void 110 usage(void) 111 { 112 113 fprintf(stderr, "usage: bsdiff oldfile newfile patchfile\n"); 114 exit(1); 115 } 116 117 int main(int argc,char *argv[]) 118 { 119 int fd; 120 u_char *old,*new; 121 off_t oldsize,newsize; 122 saidx_t *I; 123 off_t scan,pos,len; 124 off_t lastscan,lastpos,lastoffset; 125 off_t oldscore,scsc; 126 off_t s,Sf,lenf,Sb,lenb; 127 off_t overlap,Ss,lens; 128 off_t i; 129 off_t dblen,eblen; 130 u_char *db,*eb; 131 u_char buf[8]; 132 u_char header[32]; 133 FILE * pf; 134 BZFILE * pfbz2; 135 int bz2err; 136 137 if (argc != 4) 138 usage(); 139 140 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure 141 that we never try to malloc(0) and get a NULL pointer */ 142 if(((fd=open(argv[1],O_RDONLY|O_BINARY,0))<0) || 143 ((oldsize=lseek(fd,0,SEEK_END))==-1)) 144 err(1, "%s", argv[1]); 145 146 if (oldsize > SSIZE_MAX || 147 (uintmax_t)oldsize >= SIZE_T_MAX / sizeof(off_t) || 148 oldsize == OFF_MAX) { 149 errno = EFBIG; 150 err(1, "%s", argv[1]); 151 } 152 153 if (((old=malloc(oldsize+1))==NULL) || 154 (lseek(fd,0,SEEK_SET)!=0) || 155 (read(fd,old,oldsize)!=oldsize) || 156 (close(fd)==-1)) err(1,"%s",argv[1]); 157 158 if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL); 159 160 if(divsufsort(old, I, oldsize)) err(1, "divsufsort"); 161 162 /* Allocate newsize+1 bytes instead of newsize bytes to ensure 163 that we never try to malloc(0) and get a NULL pointer */ 164 if(((fd=open(argv[2],O_RDONLY|O_BINARY,0))<0) || 165 ((newsize=lseek(fd,0,SEEK_END))==-1)) 166 err(1, "%s", argv[2]); 167 168 if (newsize > SSIZE_MAX || (uintmax_t)newsize >= SIZE_T_MAX || 169 newsize == OFF_MAX) { 170 errno = EFBIG; 171 err(1, "%s", argv[2]); 172 } 173 174 if (((new=malloc(newsize+1))==NULL) || 175 (lseek(fd,0,SEEK_SET)!=0) || 176 (read(fd,new,newsize)!=newsize) || 177 (close(fd)==-1)) err(1,"%s",argv[2]); 178 179 if(((db=malloc(newsize+1))==NULL) || 180 ((eb=malloc(newsize+1))==NULL)) err(1,NULL); 181 dblen=0; 182 eblen=0; 183 184 /* Create the patch file */ 185 if ((pf = fopen(argv[3], "wb")) == NULL) 186 err(1, "%s", argv[3]); 187 188 /* Header is 189 0 8 "BSDIFF40" 190 8 8 length of bzip2ed ctrl block 191 16 8 length of bzip2ed diff block 192 24 8 length of new file */ 193 /* File is 194 0 32 Header 195 32 ?? Bzip2ed ctrl block 196 ?? ?? Bzip2ed diff block 197 ?? ?? Bzip2ed extra block */ 198 memcpy(header,"BSDIFF40",8); 199 offtout(0, header + 8); 200 offtout(0, header + 16); 201 offtout(newsize, header + 24); 202 if (fwrite(header, 32, 1, pf) != 1) 203 err(1, "fwrite(%s)", argv[3]); 204 205 /* Compute the differences, writing ctrl as we go */ 206 if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 207 errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 208 scan=0;len=0;pos=0; 209 lastscan=0;lastpos=0;lastoffset=0; 210 while(scan<newsize) { 211 oldscore=0; 212 213 for(scsc=scan+=len;scan<newsize;scan++) { 214 len=search(I,old,oldsize,new+scan,newsize-scan, 215 0,oldsize,&pos); 216 217 for(;scsc<scan+len;scsc++) 218 if((scsc+lastoffset<oldsize) && 219 (old[scsc+lastoffset] == new[scsc])) 220 oldscore++; 221 222 if(((len==oldscore) && (len!=0)) || 223 (len>oldscore+8)) break; 224 225 if((scan+lastoffset<oldsize) && 226 (old[scan+lastoffset] == new[scan])) 227 oldscore--; 228 } 229 230 if((len!=oldscore) || (scan==newsize)) { 231 s=0;Sf=0;lenf=0; 232 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { 233 if(old[lastpos+i]==new[lastscan+i]) s++; 234 i++; 235 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; } 236 } 237 238 lenb=0; 239 if(scan<newsize) { 240 s=0;Sb=0; 241 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { 242 if(old[pos-i]==new[scan-i]) s++; 243 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; } 244 } 245 } 246 247 if(lastscan+lenf>scan-lenb) { 248 overlap=(lastscan+lenf)-(scan-lenb); 249 s=0;Ss=0;lens=0; 250 for(i=0;i<overlap;i++) { 251 if(new[lastscan+lenf-overlap+i]== 252 old[lastpos+lenf-overlap+i]) s++; 253 if(new[scan-lenb+i]== 254 old[pos-lenb+i]) s--; 255 if(s>Ss) { Ss=s; lens=i+1; } 256 } 257 258 lenf+=lens-overlap; 259 lenb-=lens; 260 } 261 262 for(i=0;i<lenf;i++) 263 db[dblen+i]=new[lastscan+i]-old[lastpos+i]; 264 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) 265 eb[eblen+i]=new[lastscan+lenf+i]; 266 267 dblen+=lenf; 268 eblen+=(scan-lenb)-(lastscan+lenf); 269 270 offtout(lenf,buf); 271 BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 272 if (bz2err != BZ_OK) 273 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 274 275 offtout((scan-lenb)-(lastscan+lenf),buf); 276 BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 277 if (bz2err != BZ_OK) 278 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 279 280 offtout((pos-lenb)-(lastpos+lenf),buf); 281 BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 282 if (bz2err != BZ_OK) 283 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 284 285 lastscan=scan-lenb; 286 lastpos=pos-lenb; 287 lastoffset=pos-scan; 288 } 289 } 290 BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 291 if (bz2err != BZ_OK) 292 errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 293 294 /* Compute size of compressed ctrl data */ 295 if ((len = ftello(pf)) == -1) 296 err(1, "ftello"); 297 offtout(len-32, header + 8); 298 299 /* Write compressed diff data */ 300 if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 301 errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 302 BZ2_bzWrite(&bz2err, pfbz2, db, dblen); 303 if (bz2err != BZ_OK) 304 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 305 BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 306 if (bz2err != BZ_OK) 307 errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 308 309 /* Compute size of compressed diff data */ 310 if ((newsize = ftello(pf)) == -1) 311 err(1, "ftello"); 312 offtout(newsize - len, header + 16); 313 314 /* Write compressed extra data */ 315 if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 316 errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 317 BZ2_bzWrite(&bz2err, pfbz2, eb, eblen); 318 if (bz2err != BZ_OK) 319 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 320 BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 321 if (bz2err != BZ_OK) 322 errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 323 324 /* Seek to the beginning, write the header, and close the file */ 325 if (fseeko(pf, 0, SEEK_SET)) 326 err(1, "fseeko"); 327 if (fwrite(header, 32, 1, pf) != 1) 328 err(1, "fwrite(%s)", argv[3]); 329 if (fclose(pf)) 330 err(1, "fclose"); 331 332 /* Free the memory we used */ 333 free(db); 334 free(eb); 335 free(I); 336 free(old); 337 free(new); 338 339 return 0; 340 } 341