Home | History | Annotate | Line # | Download | only in dist
isl_sort.c revision 1.1
      1 /*
      2  * The code of this file was taken from http://jeffreystedfast.blogspot.be,
      3  * where it was posted in 2011 by Jeffrey Stedfast under the MIT license.
      4  * The MIT license text is as follows:
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a copy
      7  * of this software and associated documentation files (the "Software"), to
      8  * deal in the Software without restriction, including without limitation the
      9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
     10  * sell copies of the Software, and to permit persons to whom the Software is
     11  * furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included in
     14  * all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22  * IN THE SOFTWARE.
     23  */
     24 
     25 #include <errno.h>
     26 #include <string.h>
     27 #include <stdlib.h>
     28 #include <isl_sort.h>
     29 
     30 #define MID(lo, hi) (lo + ((hi - lo) >> 1))
     31 
     32 /* The code here is an optimized merge sort. Starting from a generic merge sort
     33  * the following optimizations were applied:
     34  *
     35  * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and
     36  *   every element into a temporary buffer, blocks of elements are copied
     37  *   at a time.
     38  *
     39  * o To reduce the number of memcpy() calls further, copying leading
     40  *   and trailing elements into our temporary buffer is avoided, in case it is
     41  *   not necessary to merge them.
     42  *
     43  * A further optimization could be to specialize memcpy calls based on the
     44  * size of the types we compare. For now, this code does not include the
     45  * relevant optimization, as clang e.g. inlines a very efficient memcpy()
     46  * implementation. It is not clear, that the specialized version as provided in
     47  * the blog post, is really superior to the one that will be inlined by
     48  * default. So we decided to keep the code simple until this optimization was
     49  * proven to be beneficial.
     50  */
     51 
     52 static void
     53 msort (void *array, void *buf, size_t low, size_t high, size_t size,
     54        int (* compare) (const void *, const void *, void *), void *arg)
     55 {
     56     char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b;
     57     size_t copied = 0;
     58     size_t mid;
     59 
     60     mid = MID (low, high);
     61 
     62     if (mid + 1 < high)
     63         msort (array, buf, mid + 1, high, size, compare, arg);
     64 
     65     if (mid > low)
     66         msort (array, buf, low, mid, size, compare, arg);
     67 
     68     ah = ((char *) array) + ((high + 1) * size);
     69     am = ((char *) array) + ((mid + 1) * size);
     70     a1 = al = ((char *) array) + (low * size);
     71 
     72     b = (char *) buf;
     73     lo = al;
     74     hi = am;
     75 
     76     do {
     77         ls = lo;
     78         hs = hi;
     79 
     80         if (lo > al || hi > am) {
     81             /* our last loop already compared lo & hi and found lo <= hi */
     82             lo += size;
     83         }
     84 
     85         while (lo < am && compare (lo, hi, arg) <= 0)
     86             lo += size;
     87 
     88         if (lo < am) {
     89             if (copied == 0) {
     90                 /* avoid copying the leading items */
     91                 a1 = lo;
     92                 ls = lo;
     93             }
     94 
     95             /* our last compare tells us hi < lo */
     96             hi += size;
     97 
     98             while (hi < ah && compare (hi, lo, arg) < 0)
     99                 hi += size;
    100 
    101             if (lo > ls) {
    102                 memcpy (b, ls, lo - ls);
    103                 copied += (lo - ls);
    104                 b += (lo - ls);
    105             }
    106 
    107             memcpy (b, hs, hi - hs);
    108             copied += (hi - hs);
    109             b += (hi - hs);
    110         } else if (copied) {
    111             memcpy (b, ls, lo - ls);
    112             copied += (lo - ls);
    113             b += (lo - ls);
    114 
    115             /* copy everything we needed to re-order back into array */
    116             memcpy (a1, buf, copied);
    117             return;
    118         } else {
    119             /* everything already in order */
    120             return;
    121         }
    122     } while (hi < ah);
    123 
    124     if (lo < am) {
    125         memcpy (b, lo, am - lo);
    126         copied += (am - lo);
    127     }
    128 
    129     memcpy (a1, buf, copied);
    130 }
    131 
    132 static int
    133 MergeSort (void *base, size_t nmemb, size_t size,
    134            int (* compare) (const void *, const void *, void *), void *arg)
    135 {
    136     void *tmp;
    137 
    138     if (nmemb < 2)
    139         return 0;
    140 
    141     if (!(tmp = malloc (nmemb * size))) {
    142         errno = ENOMEM;
    143         return -1;
    144     }
    145 
    146     msort (base, tmp, 0, nmemb - 1, size, compare, arg);
    147 
    148     free (tmp);
    149 
    150     return 0;
    151 }
    152 
    153 int isl_sort(void *const pbase, size_t total_elems, size_t size,
    154 	int (*cmp)(const void *, const void *, void *arg), void *arg)
    155 {
    156     return MergeSort (pbase, total_elems, size, cmp, arg);
    157 }
    158