1 1.1 mrg /* Compare strings while treating digits characters numerically. 2 1.1.1.9 mrg Copyright (C) 1997-2024 Free Software Foundation, Inc. 3 1.1 mrg This file is part of the libiberty library. 4 1.1.1.8 mrg Contributed by Jean-Franois Bignolles <bignolle (at) ecoledoc.ibp.fr>, 1997. 5 1.1 mrg 6 1.1 mrg Libiberty is free software; you can redistribute it and/or 7 1.1 mrg modify it under the terms of the GNU Lesser General Public 8 1.1 mrg License as published by the Free Software Foundation; either 9 1.1 mrg version 2.1 of the License, or (at your option) any later version. 10 1.1 mrg 11 1.1 mrg Libiberty is distributed in the hope that it will be useful, 12 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 13 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 1.1 mrg Lesser General Public License for more details. 15 1.1 mrg 16 1.1 mrg You should have received a copy of the GNU Lesser General Public 17 1.1 mrg License along with the GNU C Library; if not, write to the Free 18 1.1 mrg Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 19 1.1 mrg 02110-1301 USA. */ 20 1.1 mrg 21 1.1 mrg #include "libiberty.h" 22 1.1 mrg #include "safe-ctype.h" 23 1.1 mrg 24 1.1 mrg /* 25 1.1 mrg @deftypefun int strverscmp (const char *@var{s1}, const char *@var{s2}) 26 1.1 mrg The @code{strverscmp} function compares the string @var{s1} against 27 1.1 mrg @var{s2}, considering them as holding indices/version numbers. Return 28 1.1 mrg value follows the same conventions as found in the @code{strverscmp} 29 1.1 mrg function. In fact, if @var{s1} and @var{s2} contain no digits, 30 1.1 mrg @code{strverscmp} behaves like @code{strcmp}. 31 1.1 mrg 32 1.1 mrg Basically, we compare strings normally (character by character), until 33 1.1 mrg we find a digit in each string - then we enter a special comparison 34 1.1 mrg mode, where each sequence of digits is taken as a whole. If we reach the 35 1.1 mrg end of these two parts without noticing a difference, we return to the 36 1.1 mrg standard comparison mode. There are two types of numeric parts: 37 1.1 mrg "integral" and "fractional" (those begin with a '0'). The types 38 1.1 mrg of the numeric parts affect the way we sort them: 39 1.1 mrg 40 1.1 mrg @itemize @bullet 41 1.1 mrg @item 42 1.1 mrg integral/integral: we compare values as you would expect. 43 1.1 mrg 44 1.1 mrg @item 45 1.1 mrg fractional/integral: the fractional part is less than the integral one. 46 1.1 mrg Again, no surprise. 47 1.1 mrg 48 1.1 mrg @item 49 1.1 mrg fractional/fractional: the things become a bit more complex. 50 1.1 mrg If the common prefix contains only leading zeroes, the longest part is less 51 1.1 mrg than the other one; else the comparison behaves normally. 52 1.1 mrg @end itemize 53 1.1 mrg 54 1.1 mrg @smallexample 55 1.1 mrg strverscmp ("no digit", "no digit") 56 1.1 mrg @result{} 0 // @r{same behavior as strcmp.} 57 1.1 mrg strverscmp ("item#99", "item#100") 58 1.1 mrg @result{} <0 // @r{same prefix, but 99 < 100.} 59 1.1 mrg strverscmp ("alpha1", "alpha001") 60 1.1 mrg @result{} >0 // @r{fractional part inferior to integral one.} 61 1.1 mrg strverscmp ("part1_f012", "part1_f01") 62 1.1 mrg @result{} >0 // @r{two fractional parts.} 63 1.1 mrg strverscmp ("foo.009", "foo.0") 64 1.1 mrg @result{} <0 // @r{idem, but with leading zeroes only.} 65 1.1 mrg @end smallexample 66 1.1 mrg 67 1.1 mrg This function is especially useful when dealing with filename sorting, 68 1.1 mrg because filenames frequently hold indices/version numbers. 69 1.1 mrg @end deftypefun 70 1.1 mrg 71 1.1 mrg */ 72 1.1 mrg 73 1.1 mrg /* states: S_N: normal, S_I: comparing integral part, S_F: comparing 74 1.1 mrg fractional parts, S_Z: idem but with leading Zeroes only */ 75 1.1 mrg #define S_N 0x0 76 1.1 mrg #define S_I 0x4 77 1.1 mrg #define S_F 0x8 78 1.1 mrg #define S_Z 0xC 79 1.1 mrg 80 1.1 mrg /* result_type: CMP: return diff; LEN: compare using len_diff/diff */ 81 1.1 mrg #define CMP 2 82 1.1 mrg #define LEN 3 83 1.1 mrg 84 1.1 mrg 85 1.1 mrg /* Compare S1 and S2 as strings holding indices/version numbers, 86 1.1 mrg returning less than, equal to or greater than zero if S1 is less than, 87 1.1 mrg equal to or greater than S2 (for more info, see the Glibc texinfo doc). */ 88 1.1 mrg 89 1.1 mrg int 90 1.1 mrg strverscmp (const char *s1, const char *s2) 91 1.1 mrg { 92 1.1 mrg const unsigned char *p1 = (const unsigned char *) s1; 93 1.1 mrg const unsigned char *p2 = (const unsigned char *) s2; 94 1.1 mrg unsigned char c1, c2; 95 1.1 mrg int state; 96 1.1 mrg int diff; 97 1.1 mrg 98 1.1 mrg /* Symbol(s) 0 [1-9] others (padding) 99 1.1 mrg Transition (10) 0 (01) d (00) x (11) - */ 100 1.1 mrg static const unsigned int next_state[] = 101 1.1 mrg { 102 1.1 mrg /* state x d 0 - */ 103 1.1 mrg /* S_N */ S_N, S_I, S_Z, S_N, 104 1.1 mrg /* S_I */ S_N, S_I, S_I, S_I, 105 1.1 mrg /* S_F */ S_N, S_F, S_F, S_F, 106 1.1 mrg /* S_Z */ S_N, S_F, S_Z, S_Z 107 1.1 mrg }; 108 1.1 mrg 109 1.1 mrg static const int result_type[] = 110 1.1 mrg { 111 1.1 mrg /* state x/x x/d x/0 x/- d/x d/d d/0 d/- 112 1.1 mrg 0/x 0/d 0/0 0/- -/x -/d -/0 -/- */ 113 1.1 mrg 114 1.1 mrg /* S_N */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, 115 1.1 mrg CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 116 1.1 mrg /* S_I */ CMP, -1, -1, CMP, +1, LEN, LEN, CMP, 117 1.1 mrg +1, LEN, LEN, CMP, CMP, CMP, CMP, CMP, 118 1.1 mrg /* S_F */ CMP, CMP, CMP, CMP, CMP, LEN, CMP, CMP, 119 1.1 mrg CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, 120 1.1 mrg /* S_Z */ CMP, +1, +1, CMP, -1, CMP, CMP, CMP, 121 1.1 mrg -1, CMP, CMP, CMP 122 1.1 mrg }; 123 1.1 mrg 124 1.1 mrg if (p1 == p2) 125 1.1 mrg return 0; 126 1.1 mrg 127 1.1 mrg c1 = *p1++; 128 1.1 mrg c2 = *p2++; 129 1.1 mrg /* Hint: '0' is a digit too. */ 130 1.1 mrg state = S_N | ((c1 == '0') + (ISDIGIT (c1) != 0)); 131 1.1 mrg 132 1.1 mrg while ((diff = c1 - c2) == 0 && c1 != '\0') 133 1.1 mrg { 134 1.1 mrg state = next_state[state]; 135 1.1 mrg c1 = *p1++; 136 1.1 mrg c2 = *p2++; 137 1.1 mrg state |= (c1 == '0') + (ISDIGIT (c1) != 0); 138 1.1 mrg } 139 1.1 mrg 140 1.1 mrg state = result_type[state << 2 | (((c2 == '0') + (ISDIGIT (c2) != 0)))]; 141 1.1 mrg 142 1.1 mrg switch (state) 143 1.1 mrg { 144 1.1 mrg case CMP: 145 1.1 mrg return diff; 146 1.1 mrg 147 1.1 mrg case LEN: 148 1.1 mrg while (ISDIGIT (*p1++)) 149 1.1 mrg if (!ISDIGIT (*p2++)) 150 1.1 mrg return 1; 151 1.1 mrg 152 1.1 mrg return ISDIGIT (*p2) ? -1 : diff; 153 1.1 mrg 154 1.1 mrg default: 155 1.1 mrg return state; 156 1.1 mrg } 157 1.1 mrg } 158