1 1.36 kre /* $NetBSD: sort.h,v 1.36 2016/06/01 02:37:55 kre Exp $ */ 2 1.17 jdolecek 3 1.17 jdolecek /*- 4 1.17 jdolecek * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 1.17 jdolecek * All rights reserved. 6 1.17 jdolecek * 7 1.17 jdolecek * This code is derived from software contributed to The NetBSD Foundation 8 1.17 jdolecek * by Ben Harris and Jaromir Dolecek. 9 1.17 jdolecek * 10 1.17 jdolecek * Redistribution and use in source and binary forms, with or without 11 1.17 jdolecek * modification, are permitted provided that the following conditions 12 1.17 jdolecek * are met: 13 1.17 jdolecek * 1. Redistributions of source code must retain the above copyright 14 1.17 jdolecek * notice, this list of conditions and the following disclaimer. 15 1.17 jdolecek * 2. Redistributions in binary form must reproduce the above copyright 16 1.17 jdolecek * notice, this list of conditions and the following disclaimer in the 17 1.17 jdolecek * documentation and/or other materials provided with the distribution. 18 1.17 jdolecek * 19 1.17 jdolecek * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.17 jdolecek * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.17 jdolecek * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.17 jdolecek * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.17 jdolecek * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.17 jdolecek * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.17 jdolecek * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.17 jdolecek * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.17 jdolecek * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.17 jdolecek * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.17 jdolecek * POSSIBILITY OF SUCH DAMAGE. 30 1.17 jdolecek */ 31 1.2 bjh21 32 1.1 bjh21 /*- 33 1.1 bjh21 * Copyright (c) 1993 34 1.1 bjh21 * The Regents of the University of California. All rights reserved. 35 1.1 bjh21 * 36 1.1 bjh21 * This code is derived from software contributed to Berkeley by 37 1.1 bjh21 * Peter McIlroy. 38 1.1 bjh21 * 39 1.1 bjh21 * Redistribution and use in source and binary forms, with or without 40 1.1 bjh21 * modification, are permitted provided that the following conditions 41 1.1 bjh21 * are met: 42 1.1 bjh21 * 1. Redistributions of source code must retain the above copyright 43 1.1 bjh21 * notice, this list of conditions and the following disclaimer. 44 1.1 bjh21 * 2. Redistributions in binary form must reproduce the above copyright 45 1.1 bjh21 * notice, this list of conditions and the following disclaimer in the 46 1.1 bjh21 * documentation and/or other materials provided with the distribution. 47 1.16 agc * 3. Neither the name of the University nor the names of its contributors 48 1.1 bjh21 * may be used to endorse or promote products derived from this software 49 1.1 bjh21 * without specific prior written permission. 50 1.1 bjh21 * 51 1.1 bjh21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 1.1 bjh21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 1.1 bjh21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 1.1 bjh21 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 1.1 bjh21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 1.1 bjh21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 1.1 bjh21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 1.1 bjh21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 1.1 bjh21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 1.1 bjh21 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 1.1 bjh21 * SUCH DAMAGE. 62 1.1 bjh21 * 63 1.1 bjh21 * @(#)sort.h 8.1 (Berkeley) 6/6/93 64 1.1 bjh21 */ 65 1.1 bjh21 66 1.1 bjh21 #include <sys/param.h> 67 1.1 bjh21 68 1.1 bjh21 #include <err.h> 69 1.1 bjh21 #include <errno.h> 70 1.1 bjh21 #include <fcntl.h> 71 1.1 bjh21 #include <limits.h> 72 1.20 lukem #include <stddef.h> 73 1.1 bjh21 #include <stdio.h> 74 1.1 bjh21 #include <stdlib.h> 75 1.4 simonb #include <string.h> 76 1.1 bjh21 77 1.11 jdolecek #define NBINS 256 78 1.1 bjh21 79 1.1 bjh21 /* values for masks, weights, and other flags. */ 80 1.25 dsl /* R and F get used to index weight_tables[] */ 81 1.33 christos #define R 0x01 /* Field is reversed */ 82 1.33 christos #define F 0x02 /* weight lower and upper case the same */ 83 1.33 christos #define I 0x04 /* mask out non-printable characters */ 84 1.33 christos #define D 0x08 /* sort alphanumeric characters only */ 85 1.33 christos #define N 0x10 /* Field is a number */ 86 1.33 christos #define BI 0x20 /* ignore blanks in icol */ 87 1.33 christos #define BT 0x40 /* ignore blanks in tcol */ 88 1.33 christos #define L 0x80 /* Sort by field length */ 89 1.1 bjh21 90 1.1 bjh21 /* masks for delimiters: blanks, fields, and termination. */ 91 1.25 dsl #define BLANK 1 /* ' ', '\t'; '\n' if -R is invoked */ 92 1.1 bjh21 #define FLD_D 2 /* ' ', '\t' default; from -t otherwise */ 93 1.25 dsl #define REC_D_F 4 /* '\n' default; from -R otherwise */ 94 1.1 bjh21 95 1.1 bjh21 #define min(a, b) ((a) < (b) ? (a) : (b)) 96 1.1 bjh21 #define max(a, b) ((a) > (b) ? (a) : (b)) 97 1.1 bjh21 98 1.1 bjh21 #define FCLOSE(file) { \ 99 1.1 bjh21 if (EOF == fclose(file)) \ 100 1.2 bjh21 err(2, "%p", file); \ 101 1.1 bjh21 } 102 1.1 bjh21 103 1.35 mrg #define EWRITE(ptr, size, n, f, fmt) { \ 104 1.1 bjh21 if (!fwrite(ptr, size, n, f)) \ 105 1.35 mrg err(2, fmt); \ 106 1.1 bjh21 } 107 1.1 bjh21 108 1.28 dsl /* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort 109 1.28 dsl * in a sane way. 110 1.28 dsl * Anyone who wants to sort data records longer than 2GB definitely needs a 111 1.28 dsl * different program! */ 112 1.28 dsl typedef unsigned int length_t; 113 1.1 bjh21 114 1.22 dsl /* A record is a key/line pair starting at rec.data. It has a total length 115 1.1 bjh21 * and an offset to the start of the line half of the pair. 116 1.1 bjh21 */ 117 1.1 bjh21 typedef struct recheader { 118 1.28 dsl length_t length; /* total length of key and line */ 119 1.28 dsl length_t offset; /* to line */ 120 1.28 dsl int keylen; /* length of key */ 121 1.28 dsl u_char data[]; /* key then line */ 122 1.1 bjh21 } RECHEADER; 123 1.1 bjh21 124 1.1 bjh21 /* This is the column as seen by struct field. It is used by enterfield. 125 1.1 bjh21 * They are matched with corresponding coldescs during initialization. 126 1.1 bjh21 */ 127 1.1 bjh21 struct column { 128 1.1 bjh21 struct coldesc *p; 129 1.1 bjh21 int num; 130 1.1 bjh21 int indent; 131 1.1 bjh21 }; 132 1.1 bjh21 133 1.1 bjh21 /* a coldesc has a number and pointers to the beginning and end of the 134 1.1 bjh21 * corresponding column in the current line. This is determined in enterkey. 135 1.1 bjh21 */ 136 1.1 bjh21 typedef struct coldesc { 137 1.1 bjh21 u_char *start; 138 1.1 bjh21 u_char *end; 139 1.1 bjh21 int num; 140 1.1 bjh21 } COLDESC; 141 1.1 bjh21 142 1.1 bjh21 /* A field has an initial and final column; an omitted final column 143 1.1 bjh21 * implies the end of the line. Flags regulate omission of blanks and 144 1.1 bjh21 * numerical sorts; mask determines which characters are ignored (from -i, -d); 145 1.1 bjh21 * weights determines the sort weights of a character (from -f, -r). 146 1.25 dsl * 147 1.25 dsl * The first field contain the global flags etc. 148 1.25 dsl * The list terminates when icol = 0. 149 1.1 bjh21 */ 150 1.1 bjh21 struct field { 151 1.1 bjh21 struct column icol; 152 1.1 bjh21 struct column tcol; 153 1.1 bjh21 u_int flags; 154 1.1 bjh21 u_char *mask; 155 1.1 bjh21 u_char *weights; 156 1.1 bjh21 }; 157 1.1 bjh21 158 1.8 jdolecek struct filelist { 159 1.7 jdolecek const char * const * names; 160 1.1 bjh21 }; 161 1.8 jdolecek 162 1.29 dsl typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *); 163 1.27 dsl typedef void (*put_func_t)(const RECHEADER *, FILE *); 164 1.8 jdolecek 165 1.1 bjh21 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; 166 1.25 dsl extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */ 167 1.1 bjh21 extern u_char d_mask[NBINS]; 168 1.28 dsl extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE; 169 1.28 dsl extern int posix_sort; 170 1.1 bjh21 extern int REC_D; 171 1.5 jdolecek extern const char *tmpdir; 172 1.18 jdolecek extern struct coldesc *clist; 173 1.13 jdolecek extern int ncols; 174 1.1 bjh21 175 1.25 dsl #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31))) 176 1.25 dsl extern unsigned int debug_flags; 177 1.25 dsl 178 1.31 enami RECHEADER *allocrec(RECHEADER *, size_t); 179 1.28 dsl void append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *)); 180 1.14 jdolecek void concat(FILE *, FILE *); 181 1.21 dsl length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *); 182 1.32 dholland void fixit(int *, char **, const char *); 183 1.14 jdolecek void fldreset(struct field *); 184 1.14 jdolecek FILE *ftmp(void); 185 1.29 dsl void fmerge(struct filelist *, int, FILE *, struct field *); 186 1.29 dsl void save_for_merge(FILE *, get_func_t, struct field *); 187 1.29 dsl void merge_sort(FILE *, put_func_t, struct field *); 188 1.24 dsl void fsort(struct filelist *, int, FILE *, struct field *); 189 1.29 dsl int geteasy(FILE *, RECHEADER *, u_char *, struct field *); 190 1.29 dsl int makekey(FILE *, RECHEADER *, u_char *, struct field *); 191 1.29 dsl int makeline(FILE *, RECHEADER *, u_char *, struct field *); 192 1.30 dsl void makeline_copydown(RECHEADER *); 193 1.14 jdolecek int optval(int, int); 194 1.36 kre __dead void order(struct filelist *, struct field *, int); 195 1.14 jdolecek void putline(const RECHEADER *, FILE *); 196 1.14 jdolecek void putrec(const RECHEADER *, FILE *); 197 1.25 dsl void putkeydump(const RECHEADER *, FILE *); 198 1.14 jdolecek void rd_append(int, int, int, FILE *, u_char *, u_char *); 199 1.28 dsl void radix_sort(RECHEADER **, RECHEADER **, int); 200 1.14 jdolecek int setfield(const char *, struct field *, int); 201 1.26 dsl void settables(void); 202