1 1.4 dsl /* $NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $ */ 2 1.1 dsl 3 1.1 dsl /*- 4 1.1 dsl * Copyright (c) 1990, 1993 5 1.1 dsl * The Regents of the University of California. All rights reserved. 6 1.1 dsl * 7 1.1 dsl * This code is derived from software contributed to Berkeley by 8 1.1 dsl * Peter McIlroy and by Dan Bernstein at New York University, 9 1.1 dsl * 10 1.1 dsl * Redistribution and use in source and binary forms, with or without 11 1.1 dsl * modification, are permitted provided that the following conditions 12 1.1 dsl * are met: 13 1.1 dsl * 1. Redistributions of source code must retain the above copyright 14 1.1 dsl * notice, this list of conditions and the following disclaimer. 15 1.1 dsl * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 dsl * notice, this list of conditions and the following disclaimer in the 17 1.1 dsl * documentation and/or other materials provided with the distribution. 18 1.1 dsl * 3. Neither the name of the University nor the names of its contributors 19 1.1 dsl * may be used to endorse or promote products derived from this software 20 1.1 dsl * without specific prior written permission. 21 1.1 dsl * 22 1.1 dsl * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.1 dsl * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.1 dsl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.1 dsl * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.1 dsl * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.1 dsl * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.1 dsl * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.1 dsl * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.1 dsl * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.1 dsl * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.1 dsl * SUCH DAMAGE. 33 1.1 dsl */ 34 1.1 dsl 35 1.1 dsl #include <sys/cdefs.h> 36 1.1 dsl #if defined(LIBC_SCCS) && !defined(lint) 37 1.1 dsl #if 0 38 1.1 dsl static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95"; 39 1.1 dsl #else 40 1.4 dsl __RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $"); 41 1.1 dsl #endif 42 1.1 dsl #endif /* LIBC_SCCS and not lint */ 43 1.1 dsl 44 1.1 dsl /* 45 1.1 dsl * 'stable' radix sort initially from libc/stdlib/radixsort.c 46 1.1 dsl */ 47 1.1 dsl 48 1.1 dsl #include <sys/types.h> 49 1.1 dsl 50 1.1 dsl #include <assert.h> 51 1.1 dsl #include <errno.h> 52 1.3 dsl #include <util.h> 53 1.1 dsl #include "sort.h" 54 1.1 dsl 55 1.1 dsl typedef struct { 56 1.3 dsl RECHEADER **sa; /* Base of saved area */ 57 1.3 dsl int sn; /* Number of entries */ 58 1.3 dsl int si; /* index into data for compare */ 59 1.1 dsl } stack; 60 1.1 dsl 61 1.3 dsl static void simplesort(RECHEADER **, int, int); 62 1.1 dsl 63 1.1 dsl #define THRESHOLD 20 /* Divert to simplesort(). */ 64 1.1 dsl 65 1.2 dsl #define empty(s) (s >= sp) 66 1.2 dsl #define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si 67 1.2 dsl #define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i 68 1.2 dsl #define swap(a, b, t) t = a, a = b, b = t 69 1.2 dsl 70 1.3 dsl void 71 1.3 dsl radix_sort(RECHEADER **a, RECHEADER **ta, int n) 72 1.1 dsl { 73 1.3 dsl u_int count[256], nc, bmin; 74 1.3 dsl u_int c; 75 1.3 dsl RECHEADER **ak, **tai, **lim; 76 1.3 dsl RECHEADER *hdr; 77 1.3 dsl int stack_size = 512; 78 1.3 dsl stack *s, *sp, *sp0, *sp1, temp; 79 1.3 dsl RECHEADER **top[256]; 80 1.3 dsl u_int *cp, bigc; 81 1.3 dsl int data_index = 0; 82 1.3 dsl 83 1.2 dsl if (n < THRESHOLD && !DEBUG('r')) { 84 1.3 dsl simplesort(a, n, 0); 85 1.3 dsl return; 86 1.1 dsl } 87 1.1 dsl 88 1.3 dsl s = emalloc(stack_size * sizeof *s); 89 1.3 dsl memset(&count, 0, sizeof count); 90 1.3 dsl /* Technically 'top' doesn't need zeroing */ 91 1.3 dsl memset(&top, 0, sizeof top); 92 1.1 dsl 93 1.1 dsl sp = s; 94 1.3 dsl push(a, n, data_index); 95 1.1 dsl while (!empty(s)) { 96 1.3 dsl pop(a, n, data_index); 97 1.2 dsl if (n < THRESHOLD && !DEBUG('r')) { 98 1.3 dsl simplesort(a, n, data_index); 99 1.1 dsl continue; 100 1.1 dsl } 101 1.1 dsl 102 1.3 dsl /* Count number of times each 'next byte' occurs */ 103 1.3 dsl nc = 0; 104 1.3 dsl bmin = 255; 105 1.3 dsl lim = a + n; 106 1.3 dsl for (ak = a, tai = ta; ak < lim; ak++) { 107 1.3 dsl hdr = *ak; 108 1.3 dsl if (data_index >= hdr->keylen) { 109 1.3 dsl /* Short key, copy to start of output */ 110 1.3 dsl if (UNIQUE && a != sp->sa) 111 1.3 dsl /* Stop duplicate being written out */ 112 1.3 dsl hdr->keylen = -1; 113 1.3 dsl *a++ = hdr; 114 1.3 dsl n--; 115 1.3 dsl continue; 116 1.1 dsl } 117 1.3 dsl /* Save in temp buffer for distribute */ 118 1.3 dsl *tai++ = hdr; 119 1.3 dsl c = hdr->data[data_index]; 120 1.3 dsl if (++count[c] == 1) { 121 1.3 dsl if (c < bmin) 122 1.3 dsl bmin = c; 123 1.3 dsl nc++; 124 1.1 dsl } 125 1.1 dsl } 126 1.3 dsl /* 127 1.3 dsl * We need save the bounds for each 'next byte' that 128 1.3 dsl * occurs more so we can sort each block. 129 1.3 dsl */ 130 1.3 dsl if (sp + nc > s + stack_size) { 131 1.3 dsl stack_size *= 2; 132 1.3 dsl sp1 = erealloc(s, stack_size * sizeof *s); 133 1.3 dsl sp = sp1 + (sp - s); 134 1.3 dsl s = sp1; 135 1.3 dsl } 136 1.1 dsl 137 1.3 dsl /* Minor optimisation to do the largest set last */ 138 1.1 dsl sp0 = sp1 = sp; 139 1.1 dsl bigc = 2; 140 1.3 dsl /* Convert 'counts' positions, saving bounds for later sorts */ 141 1.3 dsl ak = a; 142 1.1 dsl for (cp = count + bmin; nc > 0; cp++) { 143 1.1 dsl while (*cp == 0) 144 1.1 dsl cp++; 145 1.1 dsl if ((c = *cp) > 1) { 146 1.1 dsl if (c > bigc) { 147 1.1 dsl bigc = c; 148 1.1 dsl sp1 = sp; 149 1.1 dsl } 150 1.3 dsl push(ak, c, data_index+1); 151 1.1 dsl } 152 1.3 dsl ak += c; 153 1.3 dsl top[cp-count] = ak; 154 1.1 dsl *cp = 0; /* Reset count[]. */ 155 1.1 dsl nc--; 156 1.1 dsl } 157 1.1 dsl swap(*sp0, *sp1, temp); 158 1.1 dsl 159 1.1 dsl for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ 160 1.3 dsl *--top[(*ak)->data[data_index]] = *ak; 161 1.1 dsl } 162 1.2 dsl 163 1.3 dsl free(s); 164 1.1 dsl } 165 1.1 dsl 166 1.3 dsl /* insertion sort, short records are sorted before long ones */ 167 1.3 dsl static void 168 1.3 dsl simplesort(RECHEADER **a, int n, int data_index) 169 1.1 dsl { 170 1.3 dsl RECHEADER **ak, **ai; 171 1.3 dsl RECHEADER *akh; 172 1.3 dsl RECHEADER **lim = a + n; 173 1.2 dsl const u_char *s, *t; 174 1.3 dsl int s_len, t_len; 175 1.3 dsl int i; 176 1.3 dsl int r; 177 1.1 dsl 178 1.2 dsl if (n <= 1) 179 1.3 dsl return; 180 1.1 dsl 181 1.2 dsl for (ak = a+1; ak < lim; ak++) { 182 1.3 dsl akh = *ak; 183 1.3 dsl s = akh->data; 184 1.3 dsl s_len = akh->keylen; 185 1.3 dsl for (ai = ak; ;) { 186 1.3 dsl ai--; 187 1.3 dsl t_len = (*ai)->keylen; 188 1.4 dsl if (t_len != -1) { 189 1.4 dsl t = (*ai)->data; 190 1.4 dsl for (i = data_index; ; i++) { 191 1.4 dsl if (i >= s_len || i >= t_len) { 192 1.4 dsl r = s_len - t_len; 193 1.4 dsl break; 194 1.4 dsl } 195 1.4 dsl r = s[i] - t[i]; 196 1.4 dsl if (r != 0) 197 1.4 dsl break; 198 1.3 dsl } 199 1.4 dsl if (r >= 0) { 200 1.4 dsl if (r == 0 && UNIQUE) { 201 1.4 dsl /* Put record below existing */ 202 1.4 dsl ai[1] = ai[0]; 203 1.4 dsl /* Mark as duplicate - ignore */ 204 1.4 dsl akh->keylen = -1; 205 1.4 dsl } else { 206 1.4 dsl ai++; 207 1.4 dsl } 208 1.3 dsl break; 209 1.3 dsl } 210 1.2 dsl } 211 1.3 dsl ai[1] = ai[0]; 212 1.3 dsl if (ai == a) 213 1.3 dsl break; 214 1.1 dsl } 215 1.3 dsl ai[0] = akh; 216 1.2 dsl } 217 1.1 dsl } 218