radix_sort.c revision 1.3 1 1.3 dsl /* $NetBSD: radix_sort.c,v 1.3 2009/09/10 22:02:40 dsl Exp $ */
2 1.1 dsl
3 1.1 dsl /*-
4 1.1 dsl * Copyright (c) 1990, 1993
5 1.1 dsl * The Regents of the University of California. All rights reserved.
6 1.1 dsl *
7 1.1 dsl * This code is derived from software contributed to Berkeley by
8 1.1 dsl * Peter McIlroy and by Dan Bernstein at New York University,
9 1.1 dsl *
10 1.1 dsl * Redistribution and use in source and binary forms, with or without
11 1.1 dsl * modification, are permitted provided that the following conditions
12 1.1 dsl * are met:
13 1.1 dsl * 1. Redistributions of source code must retain the above copyright
14 1.1 dsl * notice, this list of conditions and the following disclaimer.
15 1.1 dsl * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 dsl * notice, this list of conditions and the following disclaimer in the
17 1.1 dsl * documentation and/or other materials provided with the distribution.
18 1.1 dsl * 3. Neither the name of the University nor the names of its contributors
19 1.1 dsl * may be used to endorse or promote products derived from this software
20 1.1 dsl * without specific prior written permission.
21 1.1 dsl *
22 1.1 dsl * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 1.1 dsl * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.1 dsl * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.1 dsl * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 1.1 dsl * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.1 dsl * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.1 dsl * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.1 dsl * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.1 dsl * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.1 dsl * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.1 dsl * SUCH DAMAGE.
33 1.1 dsl */
34 1.1 dsl
35 1.1 dsl #include <sys/cdefs.h>
36 1.1 dsl #if defined(LIBC_SCCS) && !defined(lint)
37 1.1 dsl #if 0
38 1.1 dsl static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95";
39 1.1 dsl #else
40 1.3 dsl __RCSID("$NetBSD: radix_sort.c,v 1.3 2009/09/10 22:02:40 dsl Exp $");
41 1.1 dsl #endif
42 1.1 dsl #endif /* LIBC_SCCS and not lint */
43 1.1 dsl
44 1.1 dsl /*
45 1.1 dsl * 'stable' radix sort initially from libc/stdlib/radixsort.c
46 1.1 dsl */
47 1.1 dsl
48 1.1 dsl #include <sys/types.h>
49 1.1 dsl
50 1.1 dsl #include <assert.h>
51 1.1 dsl #include <errno.h>
52 1.3 dsl #include <util.h>
53 1.1 dsl #include "sort.h"
54 1.1 dsl
55 1.1 dsl typedef struct {
56 1.3 dsl RECHEADER **sa; /* Base of saved area */
57 1.3 dsl int sn; /* Number of entries */
58 1.3 dsl int si; /* index into data for compare */
59 1.1 dsl } stack;
60 1.1 dsl
61 1.3 dsl static void simplesort(RECHEADER **, int, int);
62 1.1 dsl
63 1.1 dsl #define THRESHOLD 20 /* Divert to simplesort(). */
64 1.1 dsl
65 1.2 dsl #define empty(s) (s >= sp)
66 1.2 dsl #define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
67 1.2 dsl #define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
68 1.2 dsl #define swap(a, b, t) t = a, a = b, b = t
69 1.2 dsl
70 1.3 dsl void
71 1.3 dsl radix_sort(RECHEADER **a, RECHEADER **ta, int n)
72 1.1 dsl {
73 1.3 dsl u_int count[256], nc, bmin;
74 1.3 dsl u_int c;
75 1.3 dsl RECHEADER **ak, **tai, **lim;
76 1.3 dsl RECHEADER *hdr;
77 1.3 dsl int stack_size = 512;
78 1.3 dsl stack *s, *sp, *sp0, *sp1, temp;
79 1.3 dsl RECHEADER **top[256];
80 1.3 dsl u_int *cp, bigc;
81 1.3 dsl int data_index = 0;
82 1.3 dsl
83 1.2 dsl if (n < THRESHOLD && !DEBUG('r')) {
84 1.3 dsl simplesort(a, n, 0);
85 1.3 dsl return;
86 1.1 dsl }
87 1.1 dsl
88 1.3 dsl s = emalloc(stack_size * sizeof *s);
89 1.3 dsl memset(&count, 0, sizeof count);
90 1.3 dsl /* Technically 'top' doesn't need zeroing */
91 1.3 dsl memset(&top, 0, sizeof top);
92 1.1 dsl
93 1.1 dsl sp = s;
94 1.3 dsl push(a, n, data_index);
95 1.1 dsl while (!empty(s)) {
96 1.3 dsl pop(a, n, data_index);
97 1.2 dsl if (n < THRESHOLD && !DEBUG('r')) {
98 1.3 dsl simplesort(a, n, data_index);
99 1.1 dsl continue;
100 1.1 dsl }
101 1.1 dsl
102 1.3 dsl /* Count number of times each 'next byte' occurs */
103 1.3 dsl nc = 0;
104 1.3 dsl bmin = 255;
105 1.3 dsl lim = a + n;
106 1.3 dsl for (ak = a, tai = ta; ak < lim; ak++) {
107 1.3 dsl hdr = *ak;
108 1.3 dsl if (data_index >= hdr->keylen) {
109 1.3 dsl /* Short key, copy to start of output */
110 1.3 dsl if (UNIQUE && a != sp->sa)
111 1.3 dsl /* Stop duplicate being written out */
112 1.3 dsl hdr->keylen = -1;
113 1.3 dsl *a++ = hdr;
114 1.3 dsl n--;
115 1.3 dsl continue;
116 1.1 dsl }
117 1.3 dsl /* Save in temp buffer for distribute */
118 1.3 dsl *tai++ = hdr;
119 1.3 dsl c = hdr->data[data_index];
120 1.3 dsl if (++count[c] == 1) {
121 1.3 dsl if (c < bmin)
122 1.3 dsl bmin = c;
123 1.3 dsl nc++;
124 1.1 dsl }
125 1.1 dsl }
126 1.3 dsl /*
127 1.3 dsl * We need save the bounds for each 'next byte' that
128 1.3 dsl * occurs more so we can sort each block.
129 1.3 dsl */
130 1.3 dsl if (sp + nc > s + stack_size) {
131 1.3 dsl stack_size *= 2;
132 1.3 dsl sp1 = erealloc(s, stack_size * sizeof *s);
133 1.3 dsl sp = sp1 + (sp - s);
134 1.3 dsl s = sp1;
135 1.3 dsl }
136 1.1 dsl
137 1.3 dsl /* Minor optimisation to do the largest set last */
138 1.1 dsl sp0 = sp1 = sp;
139 1.1 dsl bigc = 2;
140 1.3 dsl /* Convert 'counts' positions, saving bounds for later sorts */
141 1.3 dsl ak = a;
142 1.1 dsl for (cp = count + bmin; nc > 0; cp++) {
143 1.1 dsl while (*cp == 0)
144 1.1 dsl cp++;
145 1.1 dsl if ((c = *cp) > 1) {
146 1.1 dsl if (c > bigc) {
147 1.1 dsl bigc = c;
148 1.1 dsl sp1 = sp;
149 1.1 dsl }
150 1.3 dsl push(ak, c, data_index+1);
151 1.1 dsl }
152 1.3 dsl ak += c;
153 1.3 dsl top[cp-count] = ak;
154 1.1 dsl *cp = 0; /* Reset count[]. */
155 1.1 dsl nc--;
156 1.1 dsl }
157 1.1 dsl swap(*sp0, *sp1, temp);
158 1.1 dsl
159 1.1 dsl for (ak = ta+n; --ak >= ta;) /* Deal to piles. */
160 1.3 dsl *--top[(*ak)->data[data_index]] = *ak;
161 1.1 dsl }
162 1.2 dsl
163 1.3 dsl free(s);
164 1.1 dsl }
165 1.1 dsl
166 1.3 dsl /* insertion sort, short records are sorted before long ones */
167 1.3 dsl static void
168 1.3 dsl simplesort(RECHEADER **a, int n, int data_index)
169 1.1 dsl {
170 1.3 dsl RECHEADER **ak, **ai;
171 1.3 dsl RECHEADER *akh;
172 1.3 dsl RECHEADER **lim = a + n;
173 1.2 dsl const u_char *s, *t;
174 1.3 dsl int s_len, t_len;
175 1.3 dsl int i;
176 1.3 dsl int r;
177 1.1 dsl
178 1.2 dsl if (n <= 1)
179 1.3 dsl return;
180 1.1 dsl
181 1.2 dsl for (ak = a+1; ak < lim; ak++) {
182 1.3 dsl akh = *ak;
183 1.3 dsl s = akh->data;
184 1.3 dsl s_len = akh->keylen;
185 1.3 dsl for (ai = ak; ;) {
186 1.3 dsl ai--;
187 1.3 dsl t = (*ai)->data;
188 1.3 dsl t_len = (*ai)->keylen;
189 1.3 dsl for (i = data_index; ; i++) {
190 1.3 dsl if (i >= s_len || i >= t_len) {
191 1.3 dsl r = s_len - t_len;
192 1.1 dsl break;
193 1.3 dsl }
194 1.3 dsl r = s[i] - t[i];
195 1.3 dsl if (r != 0)
196 1.3 dsl break;
197 1.3 dsl }
198 1.3 dsl if (r >= 0) {
199 1.3 dsl if (r == 0 && UNIQUE) {
200 1.3 dsl /* Put record below existing */
201 1.3 dsl ai[1] = ai[0];
202 1.3 dsl /* Mark so ignored by output() */
203 1.3 dsl akh->keylen = -1;
204 1.3 dsl } else {
205 1.3 dsl ai++;
206 1.3 dsl }
207 1.1 dsl break;
208 1.2 dsl }
209 1.3 dsl ai[1] = ai[0];
210 1.3 dsl if (ai == a)
211 1.3 dsl break;
212 1.1 dsl }
213 1.3 dsl ai[0] = akh;
214 1.2 dsl }
215 1.1 dsl }
216