1 1.26 dbj /* $NetBSD: tsort.c,v 1.26 2021/02/06 16:02:16 dbj Exp $ */ 2 1.9 jtc 3 1.1 cgd /* 4 1.9 jtc * Copyright (c) 1989, 1993, 1994 5 1.7 cgd * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * This code is derived from software contributed to Berkeley by 8 1.1 cgd * Michael Rendell of Memorial University of Newfoundland. 9 1.1 cgd * 10 1.1 cgd * Redistribution and use in source and binary forms, with or without 11 1.1 cgd * modification, are permitted provided that the following conditions 12 1.1 cgd * are met: 13 1.1 cgd * 1. Redistributions of source code must retain the above copyright 14 1.1 cgd * notice, this list of conditions and the following disclaimer. 15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 cgd * notice, this list of conditions and the following disclaimer in the 17 1.1 cgd * documentation and/or other materials provided with the distribution. 18 1.18 agc * 3. Neither the name of the University nor the names of its contributors 19 1.1 cgd * may be used to endorse or promote products derived from this software 20 1.1 cgd * without specific prior written permission. 21 1.1 cgd * 22 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.1 cgd * SUCH DAMAGE. 33 1.1 cgd */ 34 1.1 cgd 35 1.20 lukem #if HAVE_NBTOOL_CONFIG_H 36 1.20 lukem #include "nbtool_config.h" 37 1.20 lukem #endif 38 1.20 lukem 39 1.12 lukem #include <sys/cdefs.h> 40 1.20 lukem #if !defined(lint) 41 1.22 lukem __COPYRIGHT("@(#) Copyright (c) 1989, 1993, 1994\ 42 1.22 lukem The Regents of the University of California. All rights reserved."); 43 1.9 jtc #if 0 44 1.10 jtc static char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 5/4/95"; 45 1.9 jtc #endif 46 1.26 dbj __RCSID("$NetBSD: tsort.c,v 1.26 2021/02/06 16:02:16 dbj Exp $"); 47 1.1 cgd #endif /* not lint */ 48 1.17 tv 49 1.1 cgd #include <sys/types.h> 50 1.9 jtc #include <ctype.h> 51 1.9 jtc #include <db.h> 52 1.17 tv #include <err.h> 53 1.1 cgd #include <errno.h> 54 1.7 cgd #include <fcntl.h> 55 1.1 cgd #include <stdio.h> 56 1.7 cgd #include <stdlib.h> 57 1.1 cgd #include <string.h> 58 1.10 jtc #include <unistd.h> 59 1.25 christos #if !HAVE_NBTOOL_CONFIG_H 60 1.24 christos #include <util.h> 61 1.25 christos #endif 62 1.1 cgd 63 1.1 cgd /* 64 1.7 cgd * Topological sort. Input is a list of pairs of strings separated by 65 1.1 cgd * white space (spaces, tabs, and/or newlines); strings are written to 66 1.1 cgd * standard output in sorted order, one per line. 67 1.1 cgd * 68 1.1 cgd * usage: 69 1.7 cgd * tsort [-l] [inputfile] 70 1.1 cgd * If no input file is specified, standard input is read. 71 1.1 cgd * 72 1.15 wiz * Should be compatible with AT&T tsort HOWEVER the output is not identical 73 1.1 cgd * (i.e. for most graphs there is more than one sorted order, and this tsort 74 1.26 dbj * usually generates a different one than the AT&T tsort). Also, cycle 75 1.1 cgd * reporting seems to be more accurate in this version (the AT&T tsort 76 1.1 cgd * sometimes says a node is in a cycle when it isn't). 77 1.1 cgd * 78 1.1 cgd * Michael Rendell, michael (at) stretch.cs.mun.ca - Feb 26, '90 79 1.1 cgd */ 80 1.1 cgd #define HASHSIZE 53 /* doesn't need to be big */ 81 1.1 cgd #define NF_MARK 0x1 /* marker for cycle detection */ 82 1.1 cgd #define NF_ACYCLIC 0x2 /* this node is cycle free */ 83 1.7 cgd #define NF_NODEST 0x4 /* Unreachable */ 84 1.7 cgd 85 1.1 cgd typedef struct node_str NODE; 86 1.1 cgd 87 1.1 cgd struct node_str { 88 1.1 cgd NODE **n_prevp; /* pointer to previous node's n_next */ 89 1.1 cgd NODE *n_next; /* next node in graph */ 90 1.7 cgd NODE **n_arcs; /* array of arcs to other nodes */ 91 1.24 christos size_t n_narcs; /* number of arcs in n_arcs[] */ 92 1.24 christos size_t n_arcsize; /* size of n_arcs[] array */ 93 1.24 christos size_t n_refcnt; /* # of arcs pointing to this node */ 94 1.1 cgd int n_flags; /* NF_* */ 95 1.7 cgd char n_name[1]; /* name of this node */ 96 1.1 cgd }; 97 1.1 cgd 98 1.1 cgd typedef struct _buf { 99 1.1 cgd char *b_buf; 100 1.24 christos size_t b_bsize; 101 1.1 cgd } BUF; 102 1.1 cgd 103 1.23 joerg static DB *db; 104 1.23 joerg static NODE *graph, **cycle_buf, **longest_cycle; 105 1.24 christos static int debug, longest, quiet, reverse; 106 1.23 joerg 107 1.24 christos static void add_arc(const char *, const char *); 108 1.23 joerg static void clear_cycle(void); 109 1.24 christos static size_t find_cycle(NODE *, NODE *, size_t, size_t); 110 1.24 christos static NODE *get_node(const char *); 111 1.23 joerg static void remove_node(NODE *); 112 1.23 joerg static void tsort(void); 113 1.23 joerg __dead static void usage(void); 114 1.1 cgd 115 1.7 cgd int 116 1.23 joerg main(int argc, char *argv[]) 117 1.1 cgd { 118 1.12 lukem BUF *b; 119 1.24 christos int c, n, ch; 120 1.1 cgd FILE *fp; 121 1.24 christos size_t bsize, nused; 122 1.1 cgd BUF bufs[2]; 123 1.14 cgd 124 1.14 cgd setprogname(argv[0]); 125 1.1 cgd 126 1.12 lukem fp = NULL; 127 1.24 christos while ((ch = getopt(argc, argv, "dlqr")) != -1) 128 1.9 jtc switch (ch) { 129 1.7 cgd case 'd': 130 1.9 jtc debug = 1; 131 1.9 jtc break; 132 1.7 cgd case 'l': 133 1.9 jtc longest = 1; 134 1.9 jtc break; 135 1.11 mycroft case 'q': 136 1.11 mycroft quiet = 1; 137 1.11 mycroft break; 138 1.24 christos case 'r': 139 1.24 christos reverse = 1; 140 1.24 christos break; 141 1.7 cgd case '?': 142 1.7 cgd default: 143 1.7 cgd usage(); 144 1.7 cgd } 145 1.7 cgd argc -= optind; 146 1.7 cgd argv += optind; 147 1.7 cgd 148 1.9 jtc switch (argc) { 149 1.7 cgd case 0: 150 1.1 cgd fp = stdin; 151 1.7 cgd break; 152 1.7 cgd case 1: 153 1.7 cgd if ((fp = fopen(*argv, "r")) == NULL) 154 1.24 christos err(EXIT_FAILURE, "%s", *argv); 155 1.7 cgd break; 156 1.7 cgd default: 157 1.7 cgd usage(); 158 1.1 cgd } 159 1.1 cgd 160 1.24 christos for (b = bufs, n = 2; n-- > 0; b++) 161 1.24 christos b->b_buf = erealloc(NULL, b->b_bsize = 1024); 162 1.1 cgd 163 1.1 cgd /* parse input and build the graph */ 164 1.1 cgd for (n = 0, c = getc(fp);;) { 165 1.1 cgd while (c != EOF && isspace(c)) 166 1.1 cgd c = getc(fp); 167 1.1 cgd if (c == EOF) 168 1.1 cgd break; 169 1.1 cgd 170 1.1 cgd nused = 0; 171 1.1 cgd b = &bufs[n]; 172 1.1 cgd bsize = b->b_bsize; 173 1.1 cgd do { 174 1.24 christos b->b_buf[nused++] = (char)c; 175 1.7 cgd if (nused == bsize) 176 1.24 christos b->b_buf = erealloc(b->b_buf, bsize *= 2); 177 1.1 cgd c = getc(fp); 178 1.1 cgd } while (c != EOF && !isspace(c)); 179 1.1 cgd 180 1.1 cgd b->b_buf[nused] = '\0'; 181 1.1 cgd b->b_bsize = bsize; 182 1.24 christos if (n) { 183 1.24 christos if (reverse) 184 1.24 christos add_arc(bufs[1].b_buf, bufs[0].b_buf); 185 1.24 christos else 186 1.24 christos add_arc(bufs[0].b_buf, bufs[1].b_buf); 187 1.24 christos } 188 1.1 cgd n = !n; 189 1.1 cgd } 190 1.1 cgd (void)fclose(fp); 191 1.7 cgd if (n) 192 1.24 christos errx(EXIT_FAILURE, "odd data count"); 193 1.1 cgd 194 1.1 cgd /* do the sort */ 195 1.1 cgd tsort(); 196 1.24 christos return EXIT_SUCCESS; 197 1.1 cgd } 198 1.1 cgd 199 1.1 cgd /* 200 1.1 cgd * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 201 1.1 cgd * the graph, then add them. 202 1.1 cgd */ 203 1.23 joerg static void 204 1.24 christos add_arc(const char *s1, const char *s2) 205 1.1 cgd { 206 1.12 lukem NODE *n1; 207 1.1 cgd NODE *n2; 208 1.24 christos size_t bsize, i; 209 1.1 cgd 210 1.7 cgd n1 = get_node(s1); 211 1.1 cgd 212 1.24 christos if (strcmp(s1, s2) == 0) 213 1.1 cgd return; 214 1.1 cgd 215 1.7 cgd n2 = get_node(s2); 216 1.1 cgd 217 1.1 cgd /* 218 1.3 cgd * Check if this arc is already here. 219 1.3 cgd */ 220 1.3 cgd for (i = 0; i < n1->n_narcs; i++) 221 1.3 cgd if (n1->n_arcs[i] == n2) 222 1.3 cgd return; 223 1.3 cgd /* 224 1.3 cgd * Add it. 225 1.1 cgd */ 226 1.1 cgd if (n1->n_narcs == n1->n_arcsize) { 227 1.1 cgd if (!n1->n_arcsize) 228 1.1 cgd n1->n_arcsize = 10; 229 1.1 cgd bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 230 1.24 christos n1->n_arcs = erealloc(n1->n_arcs, bsize); 231 1.1 cgd n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 232 1.1 cgd } 233 1.1 cgd n1->n_arcs[n1->n_narcs++] = n2; 234 1.1 cgd ++n2->n_refcnt; 235 1.1 cgd } 236 1.1 cgd 237 1.7 cgd /* Find a node in the graph (insert if not found) and return a pointer to it. */ 238 1.23 joerg static NODE * 239 1.24 christos get_node(const char *name) 240 1.1 cgd { 241 1.7 cgd DBT data, key; 242 1.7 cgd NODE *n; 243 1.1 cgd 244 1.7 cgd if (db == NULL && 245 1.7 cgd (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 246 1.24 christos err(EXIT_FAILURE, "db: %s", name); 247 1.7 cgd 248 1.24 christos key.data = __UNCONST(name); 249 1.7 cgd key.size = strlen(name) + 1; 250 1.7 cgd 251 1.9 jtc switch ((*db->get)(db, &key, &data, 0)) { 252 1.7 cgd case 0: 253 1.21 christos (void)memmove(&n, data.data, sizeof(n)); 254 1.24 christos return n; 255 1.7 cgd case 1: 256 1.7 cgd break; 257 1.7 cgd default: 258 1.7 cgd case -1: 259 1.24 christos err(EXIT_FAILURE, "db: %s", name); 260 1.7 cgd } 261 1.1 cgd 262 1.24 christos n = emalloc(sizeof(NODE) + key.size); 263 1.1 cgd 264 1.1 cgd n->n_narcs = 0; 265 1.1 cgd n->n_arcsize = 0; 266 1.7 cgd n->n_arcs = NULL; 267 1.1 cgd n->n_refcnt = 0; 268 1.1 cgd n->n_flags = 0; 269 1.21 christos (void)memmove(n->n_name, name, key.size); 270 1.1 cgd 271 1.7 cgd /* Add to linked list. */ 272 1.9 jtc if ((n->n_next = graph) != NULL) 273 1.1 cgd graph->n_prevp = &n->n_next; 274 1.1 cgd n->n_prevp = &graph; 275 1.1 cgd graph = n; 276 1.1 cgd 277 1.7 cgd /* Add to hash table. */ 278 1.7 cgd data.data = &n; 279 1.7 cgd data.size = sizeof(n); 280 1.7 cgd if ((*db->put)(db, &key, &data, 0)) 281 1.24 christos err(EXIT_FAILURE, "db: %s", name); 282 1.24 christos return n; 283 1.7 cgd } 284 1.7 cgd 285 1.7 cgd 286 1.7 cgd /* 287 1.7 cgd * Clear the NODEST flag from all nodes. 288 1.7 cgd */ 289 1.23 joerg static void 290 1.23 joerg clear_cycle(void) 291 1.7 cgd { 292 1.7 cgd NODE *n; 293 1.7 cgd 294 1.9 jtc for (n = graph; n != NULL; n = n->n_next) 295 1.7 cgd n->n_flags &= ~NF_NODEST; 296 1.1 cgd } 297 1.1 cgd 298 1.1 cgd /* do topological sort on graph */ 299 1.23 joerg static void 300 1.23 joerg tsort(void) 301 1.1 cgd { 302 1.12 lukem NODE *n, *next; 303 1.24 christos size_t cnt, i; 304 1.1 cgd 305 1.9 jtc while (graph != NULL) { 306 1.1 cgd /* 307 1.7 cgd * Keep getting rid of simple cases until there are none left, 308 1.1 cgd * if there are any nodes still in the graph, then there is 309 1.1 cgd * a cycle in it. 310 1.1 cgd */ 311 1.1 cgd do { 312 1.9 jtc for (cnt = 0, n = graph; n != NULL; n = next) { 313 1.1 cgd next = n->n_next; 314 1.1 cgd if (n->n_refcnt == 0) { 315 1.1 cgd remove_node(n); 316 1.1 cgd ++cnt; 317 1.1 cgd } 318 1.1 cgd } 319 1.9 jtc } while (graph != NULL && cnt); 320 1.1 cgd 321 1.9 jtc if (graph == NULL) 322 1.1 cgd break; 323 1.1 cgd 324 1.1 cgd if (!cycle_buf) { 325 1.1 cgd /* 326 1.7 cgd * Allocate space for two cycle logs - one to be used 327 1.1 cgd * as scratch space, the other to save the longest 328 1.1 cgd * cycle. 329 1.1 cgd */ 330 1.9 jtc for (cnt = 0, n = graph; n != NULL; n = n->n_next) 331 1.1 cgd ++cnt; 332 1.24 christos cycle_buf = ecalloc(cnt, sizeof(*cycle_buf)); 333 1.24 christos longest_cycle = ecalloc(cnt, sizeof(*longest_cycle)); 334 1.1 cgd } 335 1.13 ross for (n = graph; n != NULL; n = n->n_next) { 336 1.13 ross if (!(n->n_flags & NF_ACYCLIC)) { 337 1.12 lukem if ((cnt = find_cycle(n, n, 0, 0)) != 0) { 338 1.11 mycroft if (!quiet) { 339 1.11 mycroft warnx("cycle in data"); 340 1.11 mycroft for (i = 0; i < cnt; i++) 341 1.11 mycroft warnx("%s", 342 1.11 mycroft longest_cycle[i]->n_name); 343 1.11 mycroft } 344 1.1 cgd remove_node(n); 345 1.7 cgd clear_cycle(); 346 1.1 cgd break; 347 1.7 cgd } else { 348 1.1 cgd /* to avoid further checks */ 349 1.7 cgd n->n_flags |= NF_ACYCLIC; 350 1.7 cgd clear_cycle(); 351 1.7 cgd } 352 1.13 ross } 353 1.13 ross } 354 1.9 jtc if (n == NULL) 355 1.9 jtc errx(1, "internal error -- could not find cycle"); 356 1.1 cgd } 357 1.1 cgd } 358 1.1 cgd 359 1.1 cgd /* print node and remove from graph (does not actually free node) */ 360 1.23 joerg static void 361 1.23 joerg remove_node(NODE *n) 362 1.1 cgd { 363 1.12 lukem NODE **np; 364 1.24 christos size_t i; 365 1.1 cgd 366 1.1 cgd (void)printf("%s\n", n->n_name); 367 1.24 christos for (np = n->n_arcs, i = n->n_narcs; i-- > 0; np++) 368 1.1 cgd --(*np)->n_refcnt; 369 1.1 cgd n->n_narcs = 0; 370 1.1 cgd *n->n_prevp = n->n_next; 371 1.1 cgd if (n->n_next) 372 1.1 cgd n->n_next->n_prevp = n->n_prevp; 373 1.1 cgd } 374 1.1 cgd 375 1.7 cgd 376 1.7 cgd /* look for the longest? cycle from node from to node to. */ 377 1.24 christos static size_t 378 1.24 christos find_cycle(NODE *from, NODE *to, size_t longest_len, size_t depth) 379 1.1 cgd { 380 1.12 lukem NODE **np; 381 1.24 christos size_t i, len; 382 1.1 cgd 383 1.1 cgd /* 384 1.1 cgd * avoid infinite loops and ignore portions of the graph known 385 1.1 cgd * to be acyclic 386 1.1 cgd */ 387 1.7 cgd if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC)) 388 1.24 christos return 0; 389 1.7 cgd from->n_flags |= NF_MARK; 390 1.1 cgd 391 1.24 christos for (np = from->n_arcs, i = from->n_narcs; i-- > 0; np++) { 392 1.1 cgd cycle_buf[depth] = *np; 393 1.1 cgd if (*np == to) { 394 1.1 cgd if (depth + 1 > longest_len) { 395 1.1 cgd longest_len = depth + 1; 396 1.21 christos (void)memcpy(longest_cycle, cycle_buf, 397 1.1 cgd longest_len * sizeof(NODE *)); 398 1.1 cgd } 399 1.1 cgd } else { 400 1.7 cgd if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST)) 401 1.7 cgd continue; 402 1.1 cgd len = find_cycle(*np, to, longest_len, depth + 1); 403 1.7 cgd 404 1.7 cgd if (debug) 405 1.24 christos (void)printf("%*s %s->%s %zu\n", (int)depth, "", 406 1.9 jtc from->n_name, to->n_name, len); 407 1.7 cgd 408 1.7 cgd if (len == 0) 409 1.9 jtc (*np)->n_flags |= NF_NODEST; 410 1.7 cgd 411 1.7 cgd if (len > longest_len) 412 1.1 cgd longest_len = len; 413 1.7 cgd 414 1.7 cgd if (len > 0 && !longest) 415 1.5 cgd break; 416 1.1 cgd } 417 1.1 cgd } 418 1.7 cgd from->n_flags &= ~NF_MARK; 419 1.24 christos return longest_len; 420 1.1 cgd } 421 1.1 cgd 422 1.23 joerg static void 423 1.23 joerg usage(void) 424 1.7 cgd { 425 1.24 christos (void)fprintf(stderr, "Usage: %s [-dlqr] [file]\n", getprogname()); 426 1.24 christos exit(EXIT_FAILURE); 427 1.1 cgd } 428