1 /* 2 * Copyright (c) 1994 University of Maryland 3 * All Rights Reserved. 4 * 5 * Permission to use, copy, modify, distribute, and sell this software and its 6 * documentation for any purpose is hereby granted without fee, provided that 7 * the above copyright notice appear in all copies and that both that 8 * copyright notice and this permission notice appear in supporting 9 * documentation, and that the name of U.M. not be used in advertising or 10 * publicity pertaining to distribution of the software without specific, 11 * written prior permission. U.M. makes no representations about the 12 * suitability of this software for any purpose. It is provided "as is" 13 * without express or implied warranty. 14 * 15 * U.M. DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL U.M. 17 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 18 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 19 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 20 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 21 * 22 * Author: James da Silva, Systems Design and Analysis Group 23 * Computer Science Department 24 * University of Maryland at College Park 25 */ 26 /* 27 * crunchide.c - tiptoes through an a.out symbol table, hiding all defined 28 * global symbols. Allows the user to supply a "keep list" of symbols 29 * that are not to be hidden. This program relies on the use of the 30 * linker's -dc flag to actually put global bss data into the file's 31 * bss segment (rather than leaving it as undefined "common" data). 32 * 33 * The point of all this is to allow multiple programs to be linked 34 * together without getting multiple-defined errors. 35 * 36 * For example, consider a program "foo.c". It can be linked with a 37 * small stub routine, called "foostub.c", eg: 38 * int foo_main(int argc, char **argv){ return main(argc, argv); } 39 * like so: 40 * cc -c foo.c foostub.c 41 * ld -dc -r foo.o foostub.o -o foo.combined.o 42 * crunchide -k _foo_main foo.combined.o 43 * at this point, foo.combined.o can be linked with another program 44 * and invoked with "foo_main(argc, argv)". foo's main() and any 45 * other globals are hidden and will not conflict with other symbols. 46 * 47 * TODO: 48 * - resolve the theoretical hanging reloc problem (see check_reloc() 49 * below). I have yet to see this problem actually occur in any real 50 * program. In what cases will gcc/gas generate code that needs a 51 * relative reloc from a global symbol, other than PIC? The 52 * solution is to not hide the symbol from the linker in this case, 53 * but to generate some random name for it so that it doesn't link 54 * with anything but holds the place for the reloc. 55 * - arrange that all the BSS segments start at the same address, so 56 * that the final crunched binary BSS size is the max of all the 57 * component programs' BSS sizes, rather than their sum. 58 */ 59 #include <unistd.h> 60 #include <stdio.h> 61 #include <stdlib.h> 62 #include <string.h> 63 #include <fcntl.h> 64 #include <a.out.h> 65 #include <sys/types.h> 66 #include <sys/stat.h> 67 #include <sys/errno.h> 68 69 char *pname = "crunchide"; 70 71 void usage(void); 72 73 void add_to_keep_list(char *symbol); 74 void add_file_to_keep_list(char *filename); 75 76 void hide_syms(char *filename); 77 78 79 int main(argc, argv) 80 int argc; 81 char **argv; 82 { 83 int ch; 84 85 if(argc > 0) pname = argv[0]; 86 87 while ((ch = getopt(argc, argv, "k:f:")) != -1) 88 switch(ch) { 89 case 'k': 90 add_to_keep_list(optarg); 91 break; 92 case 'f': 93 add_file_to_keep_list(optarg); 94 break; 95 default: 96 usage(); 97 } 98 99 argc -= optind; 100 argv += optind; 101 102 if(argc == 0) usage(); 103 104 while(argc) { 105 hide_syms(*argv); 106 argc--, argv++; 107 } 108 109 return 0; 110 } 111 112 void usage(void) 113 { 114 fprintf(stderr, 115 "Usage: %s [-k <symbol-name>] [-f <keep-list-file>] <files> ...\n", 116 pname); 117 exit(1); 118 } 119 120 /* ---------------------------- */ 121 122 struct keep { 123 struct keep *next; 124 char *sym; 125 } *keep_list; 126 127 void add_to_keep_list(char *symbol) 128 { 129 struct keep *newp, *prevp, *curp; 130 int cmp; 131 132 for(curp = keep_list, prevp = NULL; curp; prevp = curp, curp = curp->next) 133 if((cmp = strcmp(symbol, curp->sym)) <= 0) break; 134 135 if(curp && cmp == 0) 136 return; /* already in table */ 137 138 newp = (struct keep *) malloc(sizeof(struct keep)); 139 if(newp) newp->sym = strdup(symbol); 140 if(newp == NULL || newp->sym == NULL) { 141 fprintf(stderr, "%s: out of memory for keep list\n", pname); 142 exit(1); 143 } 144 145 newp->next = curp; 146 if(prevp) prevp->next = newp; 147 else keep_list = newp; 148 } 149 150 int in_keep_list(char *symbol) 151 { 152 struct keep *curp; 153 int cmp; 154 155 for(curp = keep_list; curp; curp = curp->next) 156 if((cmp = strcmp(symbol, curp->sym)) <= 0) break; 157 158 return curp && cmp == 0; 159 } 160 161 void add_file_to_keep_list(char *filename) 162 { 163 FILE *keepf; 164 char symbol[1024]; 165 int len; 166 167 if((keepf = fopen(filename, "r")) == NULL) { 168 perror(filename); 169 usage(); 170 } 171 172 while(fgets(symbol, 1024, keepf)) { 173 len = strlen(symbol); 174 if(len && symbol[len-1] == '\n') 175 symbol[len-1] = '\0'; 176 177 add_to_keep_list(symbol); 178 } 179 fclose(keepf); 180 } 181 182 /* ---------------------- */ 183 184 int nsyms, ntextrel, ndatarel; 185 struct exec *hdrp; 186 char *aoutdata, *strbase; 187 struct relocation_info *textrel, *datarel; 188 struct nlist *symbase; 189 190 191 #define SYMSTR(sp) &strbase[(sp)->n_un.n_strx] 192 193 /* is the symbol a global symbol defined in the current file? */ 194 #define IS_GLOBAL_DEFINED(sp) \ 195 (((sp)->n_type & N_EXT) && ((sp)->n_type & N_TYPE) != N_UNDF) 196 197 /* is the relocation entry dependent on a symbol? */ 198 #define IS_SYMBOL_RELOC(rp) \ 199 ((rp)->r_extern||(rp)->r_baserel||(rp)->r_jmptable) 200 201 void check_reloc(char *filename, struct relocation_info *relp); 202 203 void hide_syms(char *filename) 204 { 205 int inf, outf, rc; 206 struct stat infstat; 207 struct relocation_info *relp; 208 struct nlist *symp; 209 210 /* 211 * Open the file and do some error checking. 212 */ 213 214 if((inf = open(filename, O_RDWR)) == -1) { 215 perror(filename); 216 return; 217 } 218 219 if(fstat(inf, &infstat) == -1) { 220 perror(filename); 221 close(inf); 222 return; 223 } 224 225 if(infstat.st_size < sizeof(struct exec)) { 226 fprintf(stderr, "%s: short file\n", filename); 227 close(inf); 228 return; 229 } 230 231 /* 232 * Read the entire file into memory. XXX - Really, we only need to 233 * read the header and from TRELOFF to the end of the file. 234 */ 235 236 if((aoutdata = (char *) malloc(infstat.st_size)) == NULL) { 237 fprintf(stderr, "%s: too big to read into memory\n", filename); 238 close(inf); 239 return; 240 } 241 242 if((rc = read(inf, aoutdata, infstat.st_size)) < infstat.st_size) { 243 fprintf(stderr, "%s: read error: %s\n", filename, 244 rc == -1? strerror(errno) : "short read"); 245 close(inf); 246 return; 247 } 248 249 /* 250 * Check the header and calculate offsets and sizes from it. 251 */ 252 253 hdrp = (struct exec *) aoutdata; 254 255 if(N_BADMAG(*hdrp)) { 256 fprintf(stderr, "%s: bad magic: not an a.out file\n", filename); 257 close(inf); 258 return; 259 } 260 261 #ifdef __FreeBSD__ 262 textrel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp)); 263 datarel = (struct relocation_info *) (aoutdata + N_RELOFF(*hdrp) + 264 hdrp->a_trsize); 265 #else 266 textrel = (struct relocation_info *) (aoutdata + N_TRELOFF(*hdrp)); 267 datarel = (struct relocation_info *) (aoutdata + N_DRELOFF(*hdrp)); 268 #endif 269 symbase = (struct nlist *) (aoutdata + N_SYMOFF(*hdrp)); 270 strbase = (char *) (aoutdata + N_STROFF(*hdrp)); 271 272 ntextrel = hdrp->a_trsize / sizeof(struct relocation_info); 273 ndatarel = hdrp->a_drsize / sizeof(struct relocation_info); 274 nsyms = hdrp->a_syms / sizeof(struct nlist); 275 276 /* 277 * Zap the type field of all globally-defined symbols. The linker will 278 * subsequently ignore these entries. Don't zap any symbols in the 279 * keep list. 280 */ 281 282 for(symp = symbase; symp < symbase + nsyms; symp++) 283 if(IS_GLOBAL_DEFINED(symp) && !in_keep_list(SYMSTR(symp))) 284 symp->n_type = 0; 285 286 /* 287 * Check whether the relocation entries reference any symbols that we 288 * just zapped. I don't know whether ld can handle this case, but I 289 * haven't encountered it yet. These checks are here so that the program 290 * doesn't fail silently should such symbols be encountered. 291 */ 292 293 for(relp = textrel; relp < textrel + ntextrel; relp++) 294 check_reloc(filename, relp); 295 for(relp = datarel; relp < datarel + ndatarel; relp++) 296 check_reloc(filename, relp); 297 298 /* 299 * Write the .o file back out to disk. XXX - Really, we only need to 300 * write the symbol table entries back out. 301 */ 302 lseek(inf, 0, SEEK_SET); 303 if((rc = write(inf, aoutdata, infstat.st_size)) < infstat.st_size) { 304 fprintf(stderr, "%s: write error: %s\n", filename, 305 rc == -1? strerror(errno) : "short write"); 306 } 307 308 close(inf); 309 } 310 311 312 void check_reloc(char *filename, struct relocation_info *relp) 313 { 314 /* bail out if we zapped a symbol that is needed */ 315 if(IS_SYMBOL_RELOC(relp) && symbase[relp->r_symbolnum].n_type == 0) { 316 fprintf(stderr, 317 "%s: oops, have hanging relocation for %s: bailing out!\n", 318 filename, SYMSTR(&symbase[relp->r_symbolnum])); 319 exit(1); 320 } 321 } 322