1 1.64 christos /* $NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $ */ 2 1.34 jdolecek 3 1.34 jdolecek /*- 4 1.34 jdolecek * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 1.34 jdolecek * All rights reserved. 6 1.34 jdolecek * 7 1.34 jdolecek * This code is derived from software contributed to The NetBSD Foundation 8 1.34 jdolecek * by Ben Harris and Jaromir Dolecek. 9 1.34 jdolecek * 10 1.34 jdolecek * Redistribution and use in source and binary forms, with or without 11 1.34 jdolecek * modification, are permitted provided that the following conditions 12 1.34 jdolecek * are met: 13 1.34 jdolecek * 1. Redistributions of source code must retain the above copyright 14 1.34 jdolecek * notice, this list of conditions and the following disclaimer. 15 1.34 jdolecek * 2. Redistributions in binary form must reproduce the above copyright 16 1.34 jdolecek * notice, this list of conditions and the following disclaimer in the 17 1.34 jdolecek * documentation and/or other materials provided with the distribution. 18 1.34 jdolecek * 19 1.34 jdolecek * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.34 jdolecek * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.34 jdolecek * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.34 jdolecek * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.34 jdolecek * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.34 jdolecek * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.34 jdolecek * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.34 jdolecek * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.34 jdolecek * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.34 jdolecek * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.34 jdolecek * POSSIBILITY OF SUCH DAMAGE. 30 1.34 jdolecek */ 31 1.2 bjh21 32 1.1 bjh21 /*- 33 1.1 bjh21 * Copyright (c) 1993 34 1.1 bjh21 * The Regents of the University of California. All rights reserved. 35 1.1 bjh21 * 36 1.1 bjh21 * This code is derived from software contributed to Berkeley by 37 1.1 bjh21 * Peter McIlroy. 38 1.1 bjh21 * 39 1.1 bjh21 * Redistribution and use in source and binary forms, with or without 40 1.1 bjh21 * modification, are permitted provided that the following conditions 41 1.1 bjh21 * are met: 42 1.1 bjh21 * 1. Redistributions of source code must retain the above copyright 43 1.1 bjh21 * notice, this list of conditions and the following disclaimer. 44 1.1 bjh21 * 2. Redistributions in binary form must reproduce the above copyright 45 1.1 bjh21 * notice, this list of conditions and the following disclaimer in the 46 1.1 bjh21 * documentation and/or other materials provided with the distribution. 47 1.33 agc * 3. Neither the name of the University nor the names of its contributors 48 1.1 bjh21 * may be used to endorse or promote products derived from this software 49 1.1 bjh21 * without specific prior written permission. 50 1.1 bjh21 * 51 1.1 bjh21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 1.1 bjh21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 1.1 bjh21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 1.1 bjh21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 1.1 bjh21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 1.1 bjh21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 1.1 bjh21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 1.1 bjh21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 1.1 bjh21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 1.1 bjh21 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 1.1 bjh21 * SUCH DAMAGE. 62 1.1 bjh21 */ 63 1.64 christos #include <sys/cdefs.h> 64 1.64 christos #ifndef lint 65 1.64 christos __COPYRIGHT("@(#) Copyright (c) 1993\ 66 1.64 christos The Regents of the University of California. All rights reserved."); 67 1.64 christos #endif /* not lint */ 68 1.64 christos __RCSID("$NetBSD: sort.c,v 1.64 2017/01/10 21:13:45 christos Exp $"); 69 1.1 bjh21 70 1.1 bjh21 /* Sort sorts a file using an optional user-defined key. 71 1.1 bjh21 * Sort uses radix sort for internal sorting, and allows 72 1.1 bjh21 * a choice of merge sort and radix sort for external sorting. 73 1.1 bjh21 */ 74 1.1 bjh21 75 1.26 ross #include <sys/types.h> 76 1.26 ross #include <sys/time.h> 77 1.64 christos #include <sys/stat.h> 78 1.26 ross #include <sys/resource.h> 79 1.26 ross 80 1.64 christos #include <locale.h> 81 1.1 bjh21 #include <paths.h> 82 1.1 bjh21 #include <signal.h> 83 1.1 bjh21 #include <stdlib.h> 84 1.1 bjh21 #include <string.h> 85 1.1 bjh21 #include <unistd.h> 86 1.64 christos #include <util.h> 87 1.64 christos 88 1.64 christos #include "sort.h" 89 1.64 christos #include "fsort.h" 90 1.64 christos #include "pathnames.h" 91 1.1 bjh21 92 1.1 bjh21 int REC_D = '\n'; 93 1.1 bjh21 u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 94 1.30 jdolecek 95 1.1 bjh21 /* 96 1.1 bjh21 * weight tables. Gweights is one of ascii, Rascii.. 97 1.1 bjh21 * modified to weight rec_d = 0 (or 255) 98 1.1 bjh21 */ 99 1.52 dsl u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 100 1.1 bjh21 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 101 1.55 dsl 102 1.1 bjh21 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103 1.55 dsl int REVERSE = 0; 104 1.55 dsl int posix_sort; 105 1.1 bjh21 106 1.52 dsl unsigned int debug_flags = 0; 107 1.52 dsl 108 1.30 jdolecek static char toutpath[MAXPATHLEN]; 109 1.10 jdolecek 110 1.22 jdolecek const char *tmpdir; /* where temporary files should be put */ 111 1.22 jdolecek 112 1.49 dsl static void cleanup(void); 113 1.49 dsl static void onsignal(int); 114 1.61 joerg __dead static void usage(const char *); 115 1.2 bjh21 116 1.1 bjh21 int 117 1.49 dsl main(int argc, char *argv[]) 118 1.1 bjh21 { 119 1.54 dsl int ch, i, stdinflag = 0; 120 1.62 kre char mode = 0; 121 1.1 bjh21 char *outfile, *outpath = 0; 122 1.55 dsl struct field *fldtab; 123 1.54 dsl size_t fldtab_sz, fld_cnt; 124 1.13 jdolecek struct filelist filelist; 125 1.51 dsl int num_input_files; 126 1.3 bjh21 FILE *outfp = NULL; 127 1.31 jdolecek struct rlimit rl; 128 1.43 jdolecek struct stat st; 129 1.10 jdolecek 130 1.11 jdolecek setlocale(LC_ALL, ""); 131 1.11 jdolecek 132 1.31 jdolecek /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 133 1.31 jdolecek if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 134 1.31 jdolecek err(2, "getrlimit"); 135 1.31 jdolecek rl.rlim_cur = rl.rlim_max; 136 1.31 jdolecek if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 137 1.31 jdolecek err(2, "setrlimit"); 138 1.31 jdolecek 139 1.1 bjh21 d_mask[REC_D = '\n'] = REC_D_F; 140 1.1 bjh21 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 141 1.35 jdolecek 142 1.54 dsl /* fldtab[0] is the global options. */ 143 1.54 dsl fldtab_sz = 3; 144 1.54 dsl fld_cnt = 0; 145 1.55 dsl fldtab = emalloc(fldtab_sz * sizeof(*fldtab)); 146 1.36 itojun memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 147 1.10 jdolecek 148 1.62 kre #define SORT_OPTS "bcCdD:fHik:lmno:rR:sSt:T:u" 149 1.59 dholland 150 1.62 kre /* Convert "+field" args to -k format */ 151 1.59 dholland fixit(&argc, argv, SORT_OPTS); 152 1.35 jdolecek 153 1.22 jdolecek if (!(tmpdir = getenv("TMPDIR"))) 154 1.22 jdolecek tmpdir = _PATH_TMP; 155 1.10 jdolecek 156 1.59 dholland while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 157 1.10 jdolecek switch (ch) { 158 1.11 jdolecek case 'b': 159 1.54 dsl fldtab[0].flags |= BI | BT; 160 1.11 jdolecek break; 161 1.62 kre case 'c': case 'C': case 'm': 162 1.62 kre if (mode) 163 1.62 kre usage("Incompatible operation modes"); 164 1.62 kre mode = ch; 165 1.1 bjh21 break; 166 1.52 dsl case 'D': /* Debug flags */ 167 1.52 dsl for (i = 0; optarg[i]; i++) 168 1.52 dsl debug_flags |= 1 << (optarg[i] & 31); 169 1.52 dsl break; 170 1.60 christos case 'd': case 'f': case 'i': case 'n': case 'l': 171 1.54 dsl fldtab[0].flags |= optval(ch, 0); 172 1.1 bjh21 break; 173 1.11 jdolecek case 'H': 174 1.50 dsl /* -H was ; use merge sort for blocks of large files' */ 175 1.50 dsl /* That is now the default. */ 176 1.11 jdolecek break; 177 1.11 jdolecek case 'k': 178 1.55 dsl fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 179 1.54 dsl memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 180 1.35 jdolecek fldtab_sz++; 181 1.35 jdolecek 182 1.54 dsl setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 183 1.11 jdolecek break; 184 1.1 bjh21 case 'o': 185 1.1 bjh21 outpath = optarg; 186 1.1 bjh21 break; 187 1.55 dsl case 'r': 188 1.55 dsl REVERSE = 1; 189 1.55 dsl break; 190 1.62 kre case 'R': 191 1.62 kre if (REC_D != '\n') 192 1.62 kre usage("multiple record delimiters"); 193 1.62 kre REC_D = *optarg; 194 1.62 kre if (optarg[1] != '\0') { 195 1.62 kre char *ep; 196 1.62 kre int t = 0; 197 1.62 kre 198 1.62 kre if (optarg[0] == '\\') 199 1.62 kre optarg++, t = 8; 200 1.62 kre REC_D = (int)strtol(optarg, &ep, t); 201 1.62 kre if (*ep != '\0' || REC_D < 0 || 202 1.62 kre REC_D >= (int)__arraycount(d_mask)) 203 1.62 kre errx(2, "invalid record delimiter %s", 204 1.62 kre optarg); 205 1.62 kre } 206 1.62 kre if (REC_D == '\n') 207 1.62 kre break; 208 1.62 kre d_mask['\n'] = d_mask[' ']; 209 1.62 kre d_mask[REC_D] = REC_D_F; 210 1.62 kre break; 211 1.9 jdolecek case 's': 212 1.54 dsl /* 213 1.54 dsl * Nominally 'stable sort', keep lines with equal keys 214 1.54 dsl * in input file order. (Default for NetBSD) 215 1.54 dsl * (-s for GNU sort compatibility.) 216 1.54 dsl */ 217 1.55 dsl posix_sort = 0; 218 1.9 jdolecek break; 219 1.9 jdolecek case 'S': 220 1.54 dsl /* 221 1.54 dsl * Reverse of -s! 222 1.54 dsl * This needs to enforce a POSIX sort where records 223 1.54 dsl * with equal keys are then sorted by the raw data. 224 1.54 dsl * Currently not implemented! 225 1.54 dsl * (using libc radixsort() v sradixsort() doesn't 226 1.54 dsl * have the desired effect.) 227 1.54 dsl */ 228 1.55 dsl posix_sort = 1; 229 1.1 bjh21 break; 230 1.1 bjh21 case 't': 231 1.1 bjh21 if (SEP_FLAG) 232 1.1 bjh21 usage("multiple field delimiters"); 233 1.1 bjh21 SEP_FLAG = 1; 234 1.1 bjh21 d_mask[' '] &= ~FLD_D; 235 1.1 bjh21 d_mask['\t'] &= ~FLD_D; 236 1.62 kre d_mask['\n'] &= ~FLD_D; 237 1.2 bjh21 d_mask[(u_char)*optarg] |= FLD_D; 238 1.2 bjh21 if (d_mask[(u_char)*optarg] & REC_D_F) 239 1.19 jdolecek errx(2, "record/field delimiter clash"); 240 1.1 bjh21 break; 241 1.21 jdolecek case 'T': 242 1.23 jdolecek /* -T tmpdir */ 243 1.23 jdolecek tmpdir = optarg; 244 1.21 jdolecek break; 245 1.1 bjh21 case 'u': 246 1.1 bjh21 UNIQUE = 1; 247 1.1 bjh21 break; 248 1.1 bjh21 case '?': 249 1.10 jdolecek default: 250 1.17 soren usage(NULL); 251 1.1 bjh21 } 252 1.1 bjh21 } 253 1.51 dsl 254 1.55 dsl if (UNIQUE) 255 1.55 dsl /* Don't sort on raw record if keys match */ 256 1.55 dsl posix_sort = 0; 257 1.55 dsl 258 1.62 kre if ((mode == 'c' || mode == 'C') && argc > optind+1) 259 1.1 bjh21 errx(2, "too many input files for -c option"); 260 1.1 bjh21 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 261 1.1 bjh21 outpath = argv[argc-1]; 262 1.1 bjh21 argc -= 2; 263 1.1 bjh21 } 264 1.62 kre if (mode == 'm' && argc - optind > (MAXFCT - (16+1))*16) 265 1.1 bjh21 errx(2, "too many input files for -m option"); 266 1.51 dsl 267 1.1 bjh21 for (i = optind; i < argc; i++) { 268 1.1 bjh21 /* allow one occurrence of /dev/stdin */ 269 1.10 jdolecek if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 270 1.1 bjh21 if (stdinflag) 271 1.1 bjh21 warnx("ignoring extra \"%s\" in file list", 272 1.1 bjh21 argv[i]); 273 1.10 jdolecek else 274 1.1 bjh21 stdinflag = 1; 275 1.14 jdolecek 276 1.14 jdolecek /* change to /dev/stdin if '-' */ 277 1.51 dsl if (argv[i][0] == '-') { 278 1.51 dsl static char path_stdin[] = _PATH_STDIN; 279 1.51 dsl argv[i] = path_stdin; 280 1.51 dsl } 281 1.14 jdolecek 282 1.2 bjh21 } else if ((ch = access(argv[i], R_OK))) 283 1.7 thorpej err(2, "%s", argv[i]); 284 1.1 bjh21 } 285 1.51 dsl 286 1.55 dsl if (fldtab[1].icol.num == 0) { 287 1.55 dsl /* No sort key specified */ 288 1.60 christos if (fldtab[0].flags & (I|D|F|N|L)) { 289 1.55 dsl /* Modified - generate a key that covers the line */ 290 1.1 bjh21 fldtab[0].flags &= ~(BI|BT); 291 1.54 dsl setfield("1", &fldtab[++fld_cnt], fldtab->flags); 292 1.55 dsl fldreset(fldtab); 293 1.55 dsl } else { 294 1.55 dsl /* Unmodified, just compare the line */ 295 1.55 dsl SINGL_FLD = 1; 296 1.55 dsl fldtab[0].icol.num = 1; 297 1.1 bjh21 } 298 1.55 dsl } else { 299 1.1 bjh21 fldreset(fldtab); 300 1.1 bjh21 } 301 1.55 dsl 302 1.54 dsl settables(); 303 1.51 dsl 304 1.5 bjh21 if (optind == argc) { 305 1.10 jdolecek static const char * const names[] = { _PATH_STDIN, NULL }; 306 1.5 bjh21 filelist.names = names; 307 1.51 dsl num_input_files = 1; 308 1.51 dsl } else { 309 1.10 jdolecek filelist.names = (const char * const *) &argv[optind]; 310 1.51 dsl num_input_files = argc - optind; 311 1.51 dsl } 312 1.15 jdolecek 313 1.62 kre if (mode == 'c' || mode == 'C') { 314 1.62 kre order(&filelist, fldtab, mode == 'C'); 315 1.1 bjh21 /* NOT REACHED */ 316 1.1 bjh21 } 317 1.56 dsl 318 1.1 bjh21 if (!outpath) { 319 1.43 jdolecek toutpath[0] = '\0'; /* path not used in this case */ 320 1.1 bjh21 outfile = outpath = toutpath; 321 1.24 christos outfp = stdout; 322 1.43 jdolecek } else if (lstat(outpath, &st) == 0 323 1.43 jdolecek && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 324 1.43 jdolecek /* output file exists and isn't character or block device */ 325 1.29 tron struct sigaction act; 326 1.24 christos static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 327 1.24 christos SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 328 1.3 bjh21 int outfd; 329 1.1 bjh21 errno = 0; 330 1.1 bjh21 if (access(outpath, W_OK)) 331 1.1 bjh21 err(2, "%s", outpath); 332 1.24 christos (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 333 1.24 christos outpath); 334 1.25 christos if ((outfd = mkstemp(toutpath)) == -1) 335 1.25 christos err(2, "Cannot create temporary file `%s'", toutpath); 336 1.1 bjh21 (void)atexit(cleanup); 337 1.29 tron act.sa_handler = onsignal; 338 1.29 tron (void) sigemptyset(&act.sa_mask); 339 1.29 tron act.sa_flags = SA_RESTART | SA_RESETHAND; 340 1.1 bjh21 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 341 1.1 bjh21 sigaction(sigtable[i], &act, 0); 342 1.51 dsl outfile = toutpath; 343 1.51 dsl if ((outfp = fdopen(outfd, "w")) == NULL) 344 1.51 dsl err(2, "Cannot open temporary file `%s'", toutpath); 345 1.43 jdolecek } else { 346 1.3 bjh21 outfile = outpath; 347 1.13 jdolecek 348 1.43 jdolecek if ((outfp = fopen(outfile, "w")) == NULL) 349 1.43 jdolecek err(2, "output file %s", outfile); 350 1.43 jdolecek } 351 1.13 jdolecek 352 1.62 kre if (mode == 'm') 353 1.56 dsl fmerge(&filelist, num_input_files, outfp, fldtab); 354 1.56 dsl else 355 1.51 dsl fsort(&filelist, num_input_files, outfp, fldtab); 356 1.13 jdolecek 357 1.1 bjh21 if (outfile != outpath) { 358 1.42 jdolecek if (access(outfile, F_OK)) 359 1.7 thorpej err(2, "%s", outfile); 360 1.44 jdolecek 361 1.44 jdolecek /* 362 1.44 jdolecek * Copy file permissions bits of the original file. 363 1.44 jdolecek * st is initialized above, when we create the 364 1.44 jdolecek * temporary spool file. 365 1.44 jdolecek */ 366 1.44 jdolecek if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 367 1.44 jdolecek err(2, "cannot chmod %s: output left in %s", 368 1.44 jdolecek outpath, outfile); 369 1.44 jdolecek } 370 1.44 jdolecek 371 1.1 bjh21 (void)unlink(outpath); 372 1.1 bjh21 if (link(outfile, outpath)) 373 1.1 bjh21 err(2, "cannot link %s: output left in %s", 374 1.1 bjh21 outpath, outfile); 375 1.1 bjh21 (void)unlink(outfile); 376 1.51 dsl toutpath[0] = 0; 377 1.1 bjh21 } 378 1.1 bjh21 exit(0); 379 1.1 bjh21 } 380 1.1 bjh21 381 1.1 bjh21 static void 382 1.49 dsl onsignal(int sig) 383 1.1 bjh21 { 384 1.1 bjh21 cleanup(); 385 1.1 bjh21 } 386 1.1 bjh21 387 1.1 bjh21 static void 388 1.49 dsl cleanup(void) 389 1.1 bjh21 { 390 1.1 bjh21 if (toutpath[0]) 391 1.1 bjh21 (void)unlink(toutpath); 392 1.1 bjh21 } 393 1.1 bjh21 394 1.1 bjh21 static void 395 1.49 dsl usage(const char *msg) 396 1.1 bjh21 { 397 1.62 kre const char *pn = getprogname(); 398 1.62 kre 399 1.18 soren if (msg != NULL) 400 1.47 christos (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 401 1.41 wiz (void)fprintf(stderr, 402 1.62 kre "usage: %s [-bdfHilmnrSsu] [-k kstart[,kend]] [-o output]" 403 1.62 kre " [-R char] [-T dir]\n", pn); 404 1.41 wiz (void)fprintf(stderr, 405 1.41 wiz " [-t char] [file ...]\n"); 406 1.62 kre (void)fprintf(stderr, 407 1.63 wiz " or: %s -C|-c [-bdfilnru] [-k kstart[,kend]] [-o output]" 408 1.62 kre " [-R char]\n", pn); 409 1.62 kre (void)fprintf(stderr, 410 1.62 kre " [-t char] [file]\n"); 411 1.1 bjh21 exit(2); 412 1.1 bjh21 } 413 1.58 enami 414 1.58 enami RECHEADER * 415 1.58 enami allocrec(RECHEADER *rec, size_t size) 416 1.58 enami { 417 1.58 enami 418 1.58 enami return (erealloc(rec, size + sizeof(long) - 1)); 419 1.58 enami } 420