qsort.c revision 1.1 1 1.1 cgd /*-
2 1.1 cgd * Copyright (c) 1980, 1983, 1990 The Regents of the University of California.
3 1.1 cgd * All rights reserved.
4 1.1 cgd *
5 1.1 cgd * Redistribution and use in source and binary forms, with or without
6 1.1 cgd * modification, are permitted provided that the following conditions
7 1.1 cgd * are met:
8 1.1 cgd * 1. Redistributions of source code must retain the above copyright
9 1.1 cgd * notice, this list of conditions and the following disclaimer.
10 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
11 1.1 cgd * notice, this list of conditions and the following disclaimer in the
12 1.1 cgd * documentation and/or other materials provided with the distribution.
13 1.1 cgd * 3. All advertising materials mentioning features or use of this software
14 1.1 cgd * must display the following acknowledgement:
15 1.1 cgd * This product includes software developed by the University of
16 1.1 cgd * California, Berkeley and its contributors.
17 1.1 cgd * 4. Neither the name of the University nor the names of its contributors
18 1.1 cgd * may be used to endorse or promote products derived from this software
19 1.1 cgd * without specific prior written permission.
20 1.1 cgd *
21 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 1.1 cgd * SUCH DAMAGE.
32 1.1 cgd */
33 1.1 cgd
34 1.1 cgd #if defined(LIBC_SCCS) && !defined(lint)
35 1.1 cgd static char sccsid[] = "@(#)qsort.c 5.9 (Berkeley) 2/23/91";
36 1.1 cgd #endif /* LIBC_SCCS and not lint */
37 1.1 cgd
38 1.1 cgd #include <sys/types.h>
39 1.1 cgd #include <stdlib.h>
40 1.1 cgd
41 1.1 cgd /*
42 1.1 cgd * MTHRESH is the smallest partition for which we compare for a median
43 1.1 cgd * value instead of using the middle value.
44 1.1 cgd */
45 1.1 cgd #define MTHRESH 6
46 1.1 cgd
47 1.1 cgd /*
48 1.1 cgd * THRESH is the minimum number of entries in a partition for continued
49 1.1 cgd * partitioning.
50 1.1 cgd */
51 1.1 cgd #define THRESH 4
52 1.1 cgd
53 1.1 cgd void
54 1.1 cgd qsort(bot, nmemb, size, compar)
55 1.1 cgd void *bot;
56 1.1 cgd size_t nmemb, size;
57 1.1 cgd int (*compar) __P((const void *, const void *));
58 1.1 cgd {
59 1.1 cgd static void insertion_sort(), quick_sort();
60 1.1 cgd
61 1.1 cgd if (nmemb <= 1)
62 1.1 cgd return;
63 1.1 cgd
64 1.1 cgd if (nmemb >= THRESH)
65 1.1 cgd quick_sort(bot, nmemb, size, compar);
66 1.1 cgd else
67 1.1 cgd insertion_sort(bot, nmemb, size, compar);
68 1.1 cgd }
69 1.1 cgd
70 1.1 cgd /*
71 1.1 cgd * Swap two areas of size number of bytes. Although qsort(3) permits random
72 1.1 cgd * blocks of memory to be sorted, sorting pointers is almost certainly the
73 1.1 cgd * common case (and, were it not, could easily be made so). Regardless, it
74 1.1 cgd * isn't worth optimizing; the SWAP's get sped up by the cache, and pointer
75 1.1 cgd * arithmetic gets lost in the time required for comparison function calls.
76 1.1 cgd */
77 1.1 cgd #define SWAP(a, b) { \
78 1.1 cgd cnt = size; \
79 1.1 cgd do { \
80 1.1 cgd ch = *a; \
81 1.1 cgd *a++ = *b; \
82 1.1 cgd *b++ = ch; \
83 1.1 cgd } while (--cnt); \
84 1.1 cgd }
85 1.1 cgd
86 1.1 cgd /*
87 1.1 cgd * Knuth, Vol. 3, page 116, Algorithm Q, step b, argues that a single pass
88 1.1 cgd * of straight insertion sort after partitioning is complete is better than
89 1.1 cgd * sorting each small partition as it is created. This isn't correct in this
90 1.1 cgd * implementation because comparisons require at least one (and often two)
91 1.1 cgd * function calls and are likely to be the dominating expense of the sort.
92 1.1 cgd * Doing a final insertion sort does more comparisons than are necessary
93 1.1 cgd * because it compares the "edges" and medians of the partitions which are
94 1.1 cgd * known to be already sorted.
95 1.1 cgd *
96 1.1 cgd * This is also the reasoning behind selecting a small THRESH value (see
97 1.1 cgd * Knuth, page 122, equation 26), since the quicksort algorithm does less
98 1.1 cgd * comparisons than the insertion sort.
99 1.1 cgd */
100 1.1 cgd #define SORT(bot, n) { \
101 1.1 cgd if (n > 1) \
102 1.1 cgd if (n == 2) { \
103 1.1 cgd t1 = bot + size; \
104 1.1 cgd if (compar(t1, bot) < 0) \
105 1.1 cgd SWAP(t1, bot); \
106 1.1 cgd } else \
107 1.1 cgd insertion_sort(bot, n, size, compar); \
108 1.1 cgd }
109 1.1 cgd
110 1.1 cgd static void
111 1.1 cgd quick_sort(bot, nmemb, size, compar)
112 1.1 cgd register char *bot;
113 1.1 cgd register int size;
114 1.1 cgd int nmemb, (*compar)();
115 1.1 cgd {
116 1.1 cgd register int cnt;
117 1.1 cgd register u_char ch;
118 1.1 cgd register char *top, *mid, *t1, *t2;
119 1.1 cgd register int n1, n2;
120 1.1 cgd char *bsv;
121 1.1 cgd static void insertion_sort();
122 1.1 cgd
123 1.1 cgd /* bot and nmemb must already be set. */
124 1.1 cgd partition:
125 1.1 cgd
126 1.1 cgd /* find mid and top elements */
127 1.1 cgd mid = bot + size * (nmemb >> 1);
128 1.1 cgd top = bot + (nmemb - 1) * size;
129 1.1 cgd
130 1.1 cgd /*
131 1.1 cgd * Find the median of the first, last and middle element (see Knuth,
132 1.1 cgd * Vol. 3, page 123, Eq. 28). This test order gets the equalities
133 1.1 cgd * right.
134 1.1 cgd */
135 1.1 cgd if (nmemb >= MTHRESH) {
136 1.1 cgd n1 = compar(bot, mid);
137 1.1 cgd n2 = compar(mid, top);
138 1.1 cgd if (n1 < 0 && n2 > 0)
139 1.1 cgd t1 = compar(bot, top) < 0 ? top : bot;
140 1.1 cgd else if (n1 > 0 && n2 < 0)
141 1.1 cgd t1 = compar(bot, top) > 0 ? top : bot;
142 1.1 cgd else
143 1.1 cgd t1 = mid;
144 1.1 cgd
145 1.1 cgd /* if mid element not selected, swap selection there */
146 1.1 cgd if (t1 != mid) {
147 1.1 cgd SWAP(t1, mid);
148 1.1 cgd mid -= size;
149 1.1 cgd }
150 1.1 cgd }
151 1.1 cgd
152 1.1 cgd /* Standard quicksort, Knuth, Vol. 3, page 116, Algorithm Q. */
153 1.1 cgd #define didswap n1
154 1.1 cgd #define newbot t1
155 1.1 cgd #define replace t2
156 1.1 cgd didswap = 0;
157 1.1 cgd for (bsv = bot;;) {
158 1.1 cgd for (; bot < mid && compar(bot, mid) <= 0; bot += size);
159 1.1 cgd while (top > mid) {
160 1.1 cgd if (compar(mid, top) <= 0) {
161 1.1 cgd top -= size;
162 1.1 cgd continue;
163 1.1 cgd }
164 1.1 cgd newbot = bot + size; /* value of bot after swap */
165 1.1 cgd if (bot == mid) /* top <-> mid, mid == top */
166 1.1 cgd replace = mid = top;
167 1.1 cgd else { /* bot <-> top */
168 1.1 cgd replace = top;
169 1.1 cgd top -= size;
170 1.1 cgd }
171 1.1 cgd goto swap;
172 1.1 cgd }
173 1.1 cgd if (bot == mid)
174 1.1 cgd break;
175 1.1 cgd
176 1.1 cgd /* bot <-> mid, mid == bot */
177 1.1 cgd replace = mid;
178 1.1 cgd newbot = mid = bot; /* value of bot after swap */
179 1.1 cgd top -= size;
180 1.1 cgd
181 1.1 cgd swap: SWAP(bot, replace);
182 1.1 cgd bot = newbot;
183 1.1 cgd didswap = 1;
184 1.1 cgd }
185 1.1 cgd
186 1.1 cgd /*
187 1.1 cgd * Quicksort behaves badly in the presence of data which is already
188 1.1 cgd * sorted (see Knuth, Vol. 3, page 119) going from O N lg N to O N^2.
189 1.1 cgd * To avoid this worst case behavior, if a re-partitioning occurs
190 1.1 cgd * without swapping any elements, it is not further partitioned and
191 1.1 cgd * is insert sorted. This wins big with almost sorted data sets and
192 1.1 cgd * only loses if the data set is very strangely partitioned. A fix
193 1.1 cgd * for those data sets would be to return prematurely if the insertion
194 1.1 cgd * sort routine is forced to make an excessive number of swaps, and
195 1.1 cgd * continue the partitioning.
196 1.1 cgd */
197 1.1 cgd if (!didswap) {
198 1.1 cgd insertion_sort(bsv, nmemb, size, compar);
199 1.1 cgd return;
200 1.1 cgd }
201 1.1 cgd
202 1.1 cgd /*
203 1.1 cgd * Re-partition or sort as necessary. Note that the mid element
204 1.1 cgd * itself is correctly positioned and can be ignored.
205 1.1 cgd */
206 1.1 cgd #define nlower n1
207 1.1 cgd #define nupper n2
208 1.1 cgd bot = bsv;
209 1.1 cgd nlower = (mid - bot) / size; /* size of lower partition */
210 1.1 cgd mid += size;
211 1.1 cgd nupper = nmemb - nlower - 1; /* size of upper partition */
212 1.1 cgd
213 1.1 cgd /*
214 1.1 cgd * If must call recursively, do it on the smaller partition; this
215 1.1 cgd * bounds the stack to lg N entries.
216 1.1 cgd */
217 1.1 cgd if (nlower > nupper) {
218 1.1 cgd if (nupper >= THRESH)
219 1.1 cgd quick_sort(mid, nupper, size, compar);
220 1.1 cgd else {
221 1.1 cgd SORT(mid, nupper);
222 1.1 cgd if (nlower < THRESH) {
223 1.1 cgd SORT(bot, nlower);
224 1.1 cgd return;
225 1.1 cgd }
226 1.1 cgd }
227 1.1 cgd nmemb = nlower;
228 1.1 cgd } else {
229 1.1 cgd if (nlower >= THRESH)
230 1.1 cgd quick_sort(bot, nlower, size, compar);
231 1.1 cgd else {
232 1.1 cgd SORT(bot, nlower);
233 1.1 cgd if (nupper < THRESH) {
234 1.1 cgd SORT(mid, nupper);
235 1.1 cgd return;
236 1.1 cgd }
237 1.1 cgd }
238 1.1 cgd bot = mid;
239 1.1 cgd nmemb = nupper;
240 1.1 cgd }
241 1.1 cgd goto partition;
242 1.1 cgd /* NOTREACHED */
243 1.1 cgd }
244 1.1 cgd
245 1.1 cgd static void
246 1.1 cgd insertion_sort(bot, nmemb, size, compar)
247 1.1 cgd char *bot;
248 1.1 cgd register int size;
249 1.1 cgd int nmemb, (*compar)();
250 1.1 cgd {
251 1.1 cgd register int cnt;
252 1.1 cgd register u_char ch;
253 1.1 cgd register char *s1, *s2, *t1, *t2, *top;
254 1.1 cgd
255 1.1 cgd /*
256 1.1 cgd * A simple insertion sort (see Knuth, Vol. 3, page 81, Algorithm
257 1.1 cgd * S). Insertion sort has the same worst case as most simple sorts
258 1.1 cgd * (O N^2). It gets used here because it is (O N) in the case of
259 1.1 cgd * sorted data.
260 1.1 cgd */
261 1.1 cgd top = bot + nmemb * size;
262 1.1 cgd for (t1 = bot + size; t1 < top;) {
263 1.1 cgd for (t2 = t1; (t2 -= size) >= bot && compar(t1, t2) < 0;);
264 1.1 cgd if (t1 != (t2 += size)) {
265 1.1 cgd /* Bubble bytes up through each element. */
266 1.1 cgd for (cnt = size; cnt--; ++t1) {
267 1.1 cgd ch = *t1;
268 1.1 cgd for (s1 = s2 = t1; (s2 -= size) >= t2; s1 = s2)
269 1.1 cgd *s1 = *s2;
270 1.1 cgd *s1 = ch;
271 1.1 cgd }
272 1.1 cgd } else
273 1.1 cgd t1 += size;
274 1.1 cgd }
275 1.1 cgd }
276