Home | History | Annotate | Line # | Download | only in sort
      1 /*	$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1990, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Peter McIlroy and by Dan Bernstein at New York University,
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. Neither the name of the University nor the names of its contributors
     19  *    may be used to endorse or promote products derived from this software
     20  *    without specific prior written permission.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32  * SUCH DAMAGE.
     33  */
     34 
     35 #include <sys/cdefs.h>
     36 #if defined(LIBC_SCCS) && !defined(lint)
     37 #if 0
     38 static char sccsid[] = "@(#)radixsort.c	8.2 (Berkeley) 4/28/95";
     39 #else
     40 __RCSID("$NetBSD: radix_sort.c,v 1.4 2009/09/19 16:18:00 dsl Exp $");
     41 #endif
     42 #endif /* LIBC_SCCS and not lint */
     43 
     44 /*
     45  * 'stable' radix sort initially from libc/stdlib/radixsort.c
     46  */
     47 
     48 #include <sys/types.h>
     49 
     50 #include <assert.h>
     51 #include <errno.h>
     52 #include <util.h>
     53 #include "sort.h"
     54 
     55 typedef struct {
     56 	RECHEADER **sa;		/* Base of saved area */
     57 	int sn;			/* Number of entries */
     58 	int si;			/* index into data for compare */
     59 } stack;
     60 
     61 static void simplesort(RECHEADER **, int, int);
     62 
     63 #define	THRESHOLD	20		/* Divert to simplesort(). */
     64 
     65 #define empty(s)	(s >= sp)
     66 #define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
     67 #define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
     68 #define swap(a, b, t)	t = a, a = b, b = t
     69 
     70 void
     71 radix_sort(RECHEADER **a, RECHEADER **ta, int n)
     72 {
     73 	u_int count[256], nc, bmin;
     74 	u_int c;
     75 	RECHEADER **ak, **tai, **lim;
     76 	RECHEADER *hdr;
     77 	int stack_size = 512;
     78 	stack *s, *sp, *sp0, *sp1, temp;
     79 	RECHEADER **top[256];
     80 	u_int *cp, bigc;
     81 	int data_index = 0;
     82 
     83 	if (n < THRESHOLD && !DEBUG('r')) {
     84 		simplesort(a, n, 0);
     85 		return;
     86 	}
     87 
     88 	s = emalloc(stack_size * sizeof *s);
     89 	memset(&count, 0, sizeof count);
     90 	/* Technically 'top' doesn't need zeroing */
     91 	memset(&top, 0, sizeof top);
     92 
     93 	sp = s;
     94 	push(a, n, data_index);
     95 	while (!empty(s)) {
     96 		pop(a, n, data_index);
     97 		if (n < THRESHOLD && !DEBUG('r')) {
     98 			simplesort(a, n, data_index);
     99 			continue;
    100 		}
    101 
    102 		/* Count number of times each 'next byte' occurs */
    103 		nc = 0;
    104 		bmin = 255;
    105 		lim = a + n;
    106 		for (ak = a, tai = ta; ak < lim; ak++) {
    107 			hdr = *ak;
    108 			if (data_index >= hdr->keylen) {
    109 				/* Short key, copy to start of output */
    110 				if (UNIQUE && a != sp->sa)
    111 					/* Stop duplicate being written out */
    112 					hdr->keylen = -1;
    113 				*a++ = hdr;
    114 				n--;
    115 				continue;
    116 			}
    117 			/* Save in temp buffer for distribute */
    118 			*tai++ = hdr;
    119 			c = hdr->data[data_index];
    120 			if (++count[c] == 1) {
    121 				if (c < bmin)
    122 					bmin = c;
    123 				nc++;
    124 			}
    125 		}
    126 		/*
    127 		 * We need save the bounds for each 'next byte' that
    128 		 * occurs more so we can sort each block.
    129 		 */
    130 		if (sp + nc > s + stack_size) {
    131 			stack_size *= 2;
    132 			sp1 = erealloc(s, stack_size * sizeof *s);
    133 			sp = sp1 + (sp - s);
    134 			s = sp1;
    135 		}
    136 
    137 		/* Minor optimisation to do the largest set last */
    138 		sp0 = sp1 = sp;
    139 		bigc = 2;
    140 		/* Convert 'counts' positions, saving bounds for later sorts */
    141 		ak = a;
    142 		for (cp = count + bmin; nc > 0; cp++) {
    143 			while (*cp == 0)
    144 				cp++;
    145 			if ((c = *cp) > 1) {
    146 				if (c > bigc) {
    147 					bigc = c;
    148 					sp1 = sp;
    149 				}
    150 				push(ak, c, data_index+1);
    151 			}
    152 			ak += c;
    153 			top[cp-count] = ak;
    154 			*cp = 0;			/* Reset count[]. */
    155 			nc--;
    156 		}
    157 		swap(*sp0, *sp1, temp);
    158 
    159 		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
    160 			*--top[(*ak)->data[data_index]] = *ak;
    161 	}
    162 
    163 	free(s);
    164 }
    165 
    166 /* insertion sort, short records are sorted before long ones */
    167 static void
    168 simplesort(RECHEADER **a, int n, int data_index)
    169 {
    170 	RECHEADER **ak, **ai;
    171 	RECHEADER *akh;
    172 	RECHEADER **lim = a + n;
    173 	const u_char *s, *t;
    174 	int s_len, t_len;
    175 	int i;
    176 	int r;
    177 
    178 	if (n <= 1)
    179 		return;
    180 
    181 	for (ak = a+1; ak < lim; ak++) {
    182 		akh = *ak;
    183 		s = akh->data;
    184 		s_len = akh->keylen;
    185 		for (ai = ak; ;) {
    186 			ai--;
    187 			t_len = (*ai)->keylen;
    188 			if (t_len != -1) {
    189 				t = (*ai)->data;
    190 				for (i = data_index; ; i++) {
    191 					if (i >= s_len || i >= t_len) {
    192 						r = s_len - t_len;
    193 						break;
    194 					}
    195 					r =  s[i]  - t[i];
    196 					if (r != 0)
    197 						break;
    198 				}
    199 				if (r >= 0) {
    200 					if (r == 0 && UNIQUE) {
    201 						/* Put record below existing */
    202 						ai[1] = ai[0];
    203 						/* Mark as duplicate - ignore */
    204 						akh->keylen = -1;
    205 					} else {
    206 						ai++;
    207 					}
    208 					break;
    209 				}
    210 			}
    211 			ai[1] = ai[0];
    212 			if (ai == a)
    213 				break;
    214 		}
    215 		ai[0] = akh;
    216 	}
    217 }
    218