1 /* 2 * Copyright (c) 1989, 1993, 1994 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Michael Rendell of Memorial University of Newfoundland. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * $FreeBSD$ 37 */ 38 39 #ifndef lint 40 static const char copyright[] = 41 "@(#) Copyright (c) 1989, 1993, 1994\n\ 42 The Regents of the University of California. All rights reserved.\n"; 43 #endif /* not lint */ 44 45 #ifndef lint 46 static const char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 5/4/95"; 47 #endif /* not lint */ 48 49 #include <sys/types.h> 50 51 #include <ctype.h> 52 #include <db.h> 53 #include <err.h> 54 #include <errno.h> 55 #include <fcntl.h> 56 #include <stdio.h> 57 #include <stdlib.h> 58 #include <string.h> 59 #include <unistd.h> 60 61 /* 62 * Topological sort. Input is a list of pairs of strings separated by 63 * white space (spaces, tabs, and/or newlines); strings are written to 64 * standard output in sorted order, one per line. 65 * 66 * usage: 67 * tsort [-dlq] [inputfile] 68 * If no input file is specified, standard input is read. 69 * 70 * Should be compatible with AT&T tsort HOWEVER the output is not identical 71 * (i.e. for most graphs there is more than one sorted order, and this tsort 72 * usually generates a different one then the AT&T tsort). Also, cycle 73 * reporting seems to be more accurate in this version (the AT&T tsort 74 * sometimes says a node is in a cycle when it isn't). 75 * 76 * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 77 */ 78 #define HASHSIZE 53 /* doesn't need to be big */ 79 #define NF_MARK 0x1 /* marker for cycle detection */ 80 #define NF_ACYCLIC 0x2 /* this node is cycle free */ 81 #define NF_NODEST 0x4 /* Unreachable */ 82 83 84 typedef struct node_str NODE; 85 86 struct node_str { 87 NODE **n_prevp; /* pointer to previous node's n_next */ 88 NODE *n_next; /* next node in graph */ 89 NODE **n_arcs; /* array of arcs to other nodes */ 90 int n_narcs; /* number of arcs in n_arcs[] */ 91 int n_arcsize; /* size of n_arcs[] array */ 92 int n_refcnt; /* # of arcs pointing to this node */ 93 int n_flags; /* NF_* */ 94 char n_name[1]; /* name of this node */ 95 }; 96 97 typedef struct _buf { 98 char *b_buf; 99 int b_bsize; 100 } BUF; 101 102 DB *db; 103 NODE *graph, **cycle_buf, **longest_cycle; 104 int debug, longest, quiet; 105 106 void add_arc __P((char *, char *)); 107 int find_cycle __P((NODE *, NODE *, int, int)); 108 NODE *get_node __P((char *)); 109 void *grow_buf __P((void *, int)); 110 void remove_node __P((NODE *)); 111 void clear_cycle __P((void)); 112 void tsort __P((void)); 113 void usage __P((void)); 114 115 int 116 main(argc, argv) 117 int argc; 118 char *argv[]; 119 { 120 register BUF *b; 121 register int c, n; 122 FILE *fp; 123 int bsize, ch, nused; 124 BUF bufs[2]; 125 126 while ((ch = getopt(argc, argv, "dlq")) != -1) 127 switch (ch) { 128 case 'd': 129 debug = 1; 130 break; 131 case 'l': 132 longest = 1; 133 break; 134 case 'q': 135 quiet = 1; 136 break; 137 case '?': 138 default: 139 usage(); 140 } 141 argc -= optind; 142 argv += optind; 143 144 switch (argc) { 145 case 0: 146 fp = stdin; 147 break; 148 case 1: 149 if ((fp = fopen(*argv, "r")) == NULL) 150 err(1, "%s", *argv); 151 break; 152 default: 153 usage(); 154 } 155 156 for (b = bufs, n = 2; --n >= 0; b++) 157 b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 158 159 /* parse input and build the graph */ 160 for (n = 0, c = getc(fp);;) { 161 while (c != EOF && isspace(c)) 162 c = getc(fp); 163 if (c == EOF) 164 break; 165 166 nused = 0; 167 b = &bufs[n]; 168 bsize = b->b_bsize; 169 do { 170 b->b_buf[nused++] = c; 171 if (nused == bsize) 172 b->b_buf = grow_buf(b->b_buf, bsize *= 2); 173 c = getc(fp); 174 } while (c != EOF && !isspace(c)); 175 176 b->b_buf[nused] = '\0'; 177 b->b_bsize = bsize; 178 if (n) 179 add_arc(bufs[0].b_buf, bufs[1].b_buf); 180 n = !n; 181 } 182 (void)fclose(fp); 183 if (n) 184 errx(1, "odd data count"); 185 186 /* do the sort */ 187 tsort(); 188 exit(0); 189 } 190 191 /* double the size of oldbuf and return a pointer to the new buffer. */ 192 void * 193 grow_buf(bp, size) 194 void *bp; 195 int size; 196 { 197 if ((bp = realloc(bp, (u_int)size)) == NULL) 198 err(1, NULL); 199 return (bp); 200 } 201 202 /* 203 * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 204 * the graph, then add them. 205 */ 206 void 207 add_arc(s1, s2) 208 char *s1, *s2; 209 { 210 register NODE *n1; 211 NODE *n2; 212 int bsize, i; 213 214 n1 = get_node(s1); 215 216 if (!strcmp(s1, s2)) 217 return; 218 219 n2 = get_node(s2); 220 221 /* 222 * Check if this arc is already here. 223 */ 224 for (i = 0; i < n1->n_narcs; i++) 225 if (n1->n_arcs[i] == n2) 226 return; 227 /* 228 * Add it. 229 */ 230 if (n1->n_narcs == n1->n_arcsize) { 231 if (!n1->n_arcsize) 232 n1->n_arcsize = 10; 233 bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 234 n1->n_arcs = grow_buf(n1->n_arcs, bsize); 235 n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 236 } 237 n1->n_arcs[n1->n_narcs++] = n2; 238 ++n2->n_refcnt; 239 } 240 241 /* Find a node in the graph (insert if not found) and return a pointer to it. */ 242 NODE * 243 get_node(name) 244 char *name; 245 { 246 DBT data, key; 247 NODE *n; 248 249 if (db == NULL && 250 (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 251 err(1, "db: %s", name); 252 253 key.data = name; 254 key.size = strlen(name) + 1; 255 256 switch ((*db->get)(db, &key, &data, 0)) { 257 case 0: 258 bcopy(data.data, &n, sizeof(n)); 259 return (n); 260 case 1: 261 break; 262 default: 263 case -1: 264 err(1, "db: %s", name); 265 } 266 267 if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 268 err(1, NULL); 269 270 n->n_narcs = 0; 271 n->n_arcsize = 0; 272 n->n_arcs = NULL; 273 n->n_refcnt = 0; 274 n->n_flags = 0; 275 bcopy(name, n->n_name, key.size); 276 277 /* Add to linked list. */ 278 if ((n->n_next = graph) != NULL) 279 graph->n_prevp = &n->n_next; 280 n->n_prevp = &graph; 281 graph = n; 282 283 /* Add to hash table. */ 284 data.data = &n; 285 data.size = sizeof(n); 286 if ((*db->put)(db, &key, &data, 0)) 287 err(1, "db: %s", name); 288 return (n); 289 } 290 291 292 /* 293 * Clear the NODEST flag from all nodes. 294 */ 295 void 296 clear_cycle() 297 { 298 NODE *n; 299 300 for (n = graph; n != NULL; n = n->n_next) 301 n->n_flags &= ~NF_NODEST; 302 } 303 304 /* do topological sort on graph */ 305 void 306 tsort() 307 { 308 register NODE *n, *next; 309 register int cnt, i; 310 311 while (graph != NULL) { 312 /* 313 * Keep getting rid of simple cases until there are none left, 314 * if there are any nodes still in the graph, then there is 315 * a cycle in it. 316 */ 317 do { 318 for (cnt = 0, n = graph; n != NULL; n = next) { 319 next = n->n_next; 320 if (n->n_refcnt == 0) { 321 remove_node(n); 322 ++cnt; 323 } 324 } 325 } while (graph != NULL && cnt); 326 327 if (graph == NULL) 328 break; 329 330 if (!cycle_buf) { 331 /* 332 * Allocate space for two cycle logs - one to be used 333 * as scratch space, the other to save the longest 334 * cycle. 335 */ 336 for (cnt = 0, n = graph; n != NULL; n = n->n_next) 337 ++cnt; 338 cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); 339 longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); 340 if (cycle_buf == NULL || longest_cycle == NULL) 341 err(1, NULL); 342 } 343 for (n = graph; n != NULL; n = n->n_next) 344 if (!(n->n_flags & NF_ACYCLIC)) { 345 if ((cnt = find_cycle(n, n, 0, 0))) { 346 if (!quiet) { 347 warnx("cycle in data"); 348 for (i = 0; i < cnt; i++) 349 warnx("%s", 350 longest_cycle[i]->n_name); 351 } 352 remove_node(n); 353 clear_cycle(); 354 break; 355 } else { 356 /* to avoid further checks */ 357 n->n_flags |= NF_ACYCLIC; 358 clear_cycle(); 359 } 360 } 361 362 if (n == NULL) 363 errx(1, "internal error -- could not find cycle"); 364 } 365 } 366 367 /* print node and remove from graph (does not actually free node) */ 368 void 369 remove_node(n) 370 register NODE *n; 371 { 372 register NODE **np; 373 register int i; 374 375 (void)printf("%s\n", n->n_name); 376 for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 377 --(*np)->n_refcnt; 378 n->n_narcs = 0; 379 *n->n_prevp = n->n_next; 380 if (n->n_next) 381 n->n_next->n_prevp = n->n_prevp; 382 } 383 384 385 /* look for the longest? cycle from node from to node to. */ 386 int 387 find_cycle(from, to, longest_len, depth) 388 NODE *from, *to; 389 int depth, longest_len; 390 { 391 register NODE **np; 392 register int i, len; 393 394 /* 395 * avoid infinite loops and ignore portions of the graph known 396 * to be acyclic 397 */ 398 if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC)) 399 return (0); 400 from->n_flags |= NF_MARK; 401 402 for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 403 cycle_buf[depth] = *np; 404 if (*np == to) { 405 if (depth + 1 > longest_len) { 406 longest_len = depth + 1; 407 (void)memcpy((char *)longest_cycle, 408 (char *)cycle_buf, 409 longest_len * sizeof(NODE *)); 410 } 411 } else { 412 if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST)) 413 continue; 414 len = find_cycle(*np, to, longest_len, depth + 1); 415 416 if (debug) 417 (void)printf("%*s %s->%s %d\n", depth, "", 418 from->n_name, to->n_name, len); 419 420 if (len == 0) 421 (*np)->n_flags |= NF_NODEST; 422 423 if (len > longest_len) 424 longest_len = len; 425 426 if (len > 0 && !longest) 427 break; 428 } 429 } 430 from->n_flags &= ~NF_MARK; 431 return (longest_len); 432 } 433 434 void 435 usage() 436 { 437 (void)fprintf(stderr, "usage: tsort [-dlq] [file]\n"); 438 exit(1); 439 } 440