1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright 2003-2005 Colin Percival 5 * All rights reserved 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted providing that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/types.h> 30 31 #include <bzlib.h> 32 #include <err.h> 33 #include <errno.h> 34 #include <fcntl.h> 35 #include <limits.h> 36 #include <stdint.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <unistd.h> 41 42 #ifndef O_BINARY 43 #define O_BINARY 0 44 #endif 45 46 #include "divsufsort64.h" 47 #define saidx_t saidx64_t 48 #define divsufsort divsufsort64 49 50 #define MIN(x,y) (((x)<(y)) ? (x) : (y)) 51 52 static off_t matchlen(u_char *old,off_t oldsize,u_char *new,off_t newsize) 53 { 54 off_t i; 55 56 for(i=0;(i<oldsize)&&(i<newsize);i++) 57 if(old[i]!=new[i]) break; 58 59 return i; 60 } 61 62 static off_t search(off_t *I,u_char *old,off_t oldsize, 63 u_char *new,off_t newsize,off_t st,off_t en,off_t *pos) 64 { 65 off_t x,y; 66 67 if(en-st<2) { 68 x=matchlen(old+I[st],oldsize-I[st],new,newsize); 69 y=matchlen(old+I[en],oldsize-I[en],new,newsize); 70 71 if(x>y) { 72 *pos=I[st]; 73 return x; 74 } else { 75 *pos=I[en]; 76 return y; 77 } 78 } 79 80 x=st+(en-st)/2; 81 if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) { 82 return search(I,old,oldsize,new,newsize,x,en,pos); 83 } else { 84 return search(I,old,oldsize,new,newsize,st,x,pos); 85 }; 86 } 87 88 static void offtout(off_t x,u_char *buf) 89 { 90 off_t y; 91 92 if(x<0) y=-x; else y=x; 93 94 buf[0]=y%256;y-=buf[0]; 95 y=y/256;buf[1]=y%256;y-=buf[1]; 96 y=y/256;buf[2]=y%256;y-=buf[2]; 97 y=y/256;buf[3]=y%256;y-=buf[3]; 98 y=y/256;buf[4]=y%256;y-=buf[4]; 99 y=y/256;buf[5]=y%256;y-=buf[5]; 100 y=y/256;buf[6]=y%256;y-=buf[6]; 101 y=y/256;buf[7]=y%256; 102 103 if(x<0) buf[7]|=0x80; 104 } 105 106 static void 107 usage(void) 108 { 109 110 fprintf(stderr, "usage: bsdiff oldfile newfile patchfile\n"); 111 exit(1); 112 } 113 114 int main(int argc,char *argv[]) 115 { 116 int fd; 117 u_char *old,*new; 118 off_t oldsize,newsize; 119 saidx_t *I; 120 off_t scan,pos,len; 121 off_t lastscan,lastpos,lastoffset; 122 off_t oldscore,scsc; 123 off_t s,Sf,lenf,Sb,lenb; 124 off_t overlap,Ss,lens; 125 off_t i; 126 off_t dblen,eblen; 127 u_char *db,*eb; 128 u_char buf[8]; 129 u_char header[32]; 130 FILE * pf; 131 BZFILE * pfbz2; 132 int bz2err; 133 134 if (argc != 4) 135 usage(); 136 137 /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure 138 that we never try to malloc(0) and get a NULL pointer */ 139 if(((fd=open(argv[1],O_RDONLY|O_BINARY,0))<0) || 140 ((oldsize=lseek(fd,0,SEEK_END))==-1)) 141 err(1, "%s", argv[1]); 142 143 if (oldsize > SSIZE_MAX || 144 (uintmax_t)oldsize >= SIZE_T_MAX / sizeof(off_t) || 145 oldsize == OFF_MAX) { 146 errno = EFBIG; 147 err(1, "%s", argv[1]); 148 } 149 150 if (((old=malloc(oldsize+1))==NULL) || 151 (lseek(fd,0,SEEK_SET)!=0) || 152 (read(fd,old,oldsize)!=oldsize) || 153 (close(fd)==-1)) err(1,"%s",argv[1]); 154 155 if(((I=malloc((oldsize+1)*sizeof(saidx_t)))==NULL)) err(1,NULL); 156 157 if(divsufsort(old, I, oldsize)) err(1, "divsufsort"); 158 159 /* Allocate newsize+1 bytes instead of newsize bytes to ensure 160 that we never try to malloc(0) and get a NULL pointer */ 161 if(((fd=open(argv[2],O_RDONLY|O_BINARY,0))<0) || 162 ((newsize=lseek(fd,0,SEEK_END))==-1)) 163 err(1, "%s", argv[2]); 164 165 if (newsize > SSIZE_MAX || (uintmax_t)newsize >= SIZE_T_MAX || 166 newsize == OFF_MAX) { 167 errno = EFBIG; 168 err(1, "%s", argv[2]); 169 } 170 171 if (((new=malloc(newsize+1))==NULL) || 172 (lseek(fd,0,SEEK_SET)!=0) || 173 (read(fd,new,newsize)!=newsize) || 174 (close(fd)==-1)) err(1,"%s",argv[2]); 175 176 if(((db=malloc(newsize+1))==NULL) || 177 ((eb=malloc(newsize+1))==NULL)) err(1,NULL); 178 dblen=0; 179 eblen=0; 180 181 /* Create the patch file */ 182 if ((pf = fopen(argv[3], "wb")) == NULL) 183 err(1, "%s", argv[3]); 184 185 /* Header is 186 0 8 "BSDIFF40" 187 8 8 length of bzip2ed ctrl block 188 16 8 length of bzip2ed diff block 189 24 8 length of new file */ 190 /* File is 191 0 32 Header 192 32 ?? Bzip2ed ctrl block 193 ?? ?? Bzip2ed diff block 194 ?? ?? Bzip2ed extra block */ 195 memcpy(header,"BSDIFF40",8); 196 offtout(0, header + 8); 197 offtout(0, header + 16); 198 offtout(newsize, header + 24); 199 if (fwrite(header, 32, 1, pf) != 1) 200 err(1, "fwrite(%s)", argv[3]); 201 202 /* Compute the differences, writing ctrl as we go */ 203 if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 204 errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 205 scan=0;len=0;pos=0; 206 lastscan=0;lastpos=0;lastoffset=0; 207 while(scan<newsize) { 208 oldscore=0; 209 210 for(scsc=scan+=len;scan<newsize;scan++) { 211 len=search(I,old,oldsize,new+scan,newsize-scan, 212 0,oldsize-1,&pos); 213 214 for(;scsc<scan+len;scsc++) 215 if((scsc+lastoffset<oldsize) && 216 (old[scsc+lastoffset] == new[scsc])) 217 oldscore++; 218 219 if(((len==oldscore) && (len!=0)) || 220 (len>oldscore+8)) break; 221 222 if((scan+lastoffset<oldsize) && 223 (old[scan+lastoffset] == new[scan])) 224 oldscore--; 225 } 226 227 if((len!=oldscore) || (scan==newsize)) { 228 s=0;Sf=0;lenf=0; 229 for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) { 230 if(old[lastpos+i]==new[lastscan+i]) s++; 231 i++; 232 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; } 233 } 234 235 lenb=0; 236 if(scan<newsize) { 237 s=0;Sb=0; 238 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) { 239 if(old[pos-i]==new[scan-i]) s++; 240 if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; } 241 } 242 } 243 244 if(lastscan+lenf>scan-lenb) { 245 overlap=(lastscan+lenf)-(scan-lenb); 246 s=0;Ss=0;lens=0; 247 for(i=0;i<overlap;i++) { 248 if(new[lastscan+lenf-overlap+i]== 249 old[lastpos+lenf-overlap+i]) s++; 250 if(new[scan-lenb+i]== 251 old[pos-lenb+i]) s--; 252 if(s>Ss) { Ss=s; lens=i+1; } 253 } 254 255 lenf+=lens-overlap; 256 lenb-=lens; 257 } 258 259 for(i=0;i<lenf;i++) 260 db[dblen+i]=new[lastscan+i]-old[lastpos+i]; 261 for(i=0;i<(scan-lenb)-(lastscan+lenf);i++) 262 eb[eblen+i]=new[lastscan+lenf+i]; 263 264 dblen+=lenf; 265 eblen+=(scan-lenb)-(lastscan+lenf); 266 267 offtout(lenf,buf); 268 BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 269 if (bz2err != BZ_OK) 270 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 271 272 offtout((scan-lenb)-(lastscan+lenf),buf); 273 BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 274 if (bz2err != BZ_OK) 275 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 276 277 offtout((pos-lenb)-(lastpos+lenf),buf); 278 BZ2_bzWrite(&bz2err, pfbz2, buf, 8); 279 if (bz2err != BZ_OK) 280 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 281 282 lastscan=scan-lenb; 283 lastpos=pos-lenb; 284 lastoffset=pos-scan; 285 } 286 } 287 BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 288 if (bz2err != BZ_OK) 289 errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 290 291 /* Compute size of compressed ctrl data */ 292 if ((len = ftello(pf)) == -1) 293 err(1, "ftello"); 294 offtout(len-32, header + 8); 295 296 /* Write compressed diff data */ 297 if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 298 errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 299 BZ2_bzWrite(&bz2err, pfbz2, db, dblen); 300 if (bz2err != BZ_OK) 301 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 302 BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 303 if (bz2err != BZ_OK) 304 errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 305 306 /* Compute size of compressed diff data */ 307 if ((newsize = ftello(pf)) == -1) 308 err(1, "ftello"); 309 offtout(newsize - len, header + 16); 310 311 /* Write compressed extra data */ 312 if ((pfbz2 = BZ2_bzWriteOpen(&bz2err, pf, 9, 0, 0)) == NULL) 313 errx(1, "BZ2_bzWriteOpen, bz2err = %d", bz2err); 314 BZ2_bzWrite(&bz2err, pfbz2, eb, eblen); 315 if (bz2err != BZ_OK) 316 errx(1, "BZ2_bzWrite, bz2err = %d", bz2err); 317 BZ2_bzWriteClose(&bz2err, pfbz2, 0, NULL, NULL); 318 if (bz2err != BZ_OK) 319 errx(1, "BZ2_bzWriteClose, bz2err = %d", bz2err); 320 321 /* Seek to the beginning, write the header, and close the file */ 322 if (fseeko(pf, 0, SEEK_SET)) 323 err(1, "fseeko"); 324 if (fwrite(header, 32, 1, pf) != 1) 325 err(1, "fwrite(%s)", argv[3]); 326 if (fclose(pf)) 327 err(1, "fclose"); 328 329 /* Free the memory we used */ 330 free(db); 331 free(eb); 332 free(I); 333 free(old); 334 free(new); 335 336 return 0; 337 } 338