str.h revision 1.18 1 /* $NetBSD: str.h,v 1.18 2024/01/05 21:51:27 rillig Exp $ */
2
3 /*
4 Copyright (c) 2021 Roland Illig <rillig (at) NetBSD.org>
5 All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
9 are met:
10
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 2. Redistributions in binary form must reproduce the above copyright
14 notice, this list of conditions and the following disclaimer in the
15 documentation and/or other materials provided with the distribution.
16
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
21 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 */
29
30
31 /*
32 * Memory-efficient string handling.
33 */
34
35
36 /* A read-only string that may need to be freed after use. */
37 typedef struct FStr {
38 const char *str;
39 void *freeIt;
40 } FStr;
41
42 /* A read-only range of a character array, NOT null-terminated. */
43 typedef struct Substring {
44 const char *start;
45 const char *end;
46 } Substring;
47
48 /*
49 * Builds a string, only allocating memory if the string is different from the
50 * expected string.
51 */
52 typedef struct LazyBuf {
53 char *data;
54 size_t len;
55 size_t cap;
56 const char *expected;
57 } LazyBuf;
58
59 /* The result of splitting a string into words. */
60 typedef struct Words {
61 char **words;
62 size_t len;
63 void *freeIt;
64 } Words;
65
66 /* The result of splitting a string into words. */
67 typedef struct SubstringWords {
68 Substring *words;
69 size_t len;
70 void *freeIt;
71 } SubstringWords;
72
73 typedef struct StrMatchResult {
74 const char *error;
75 bool matched;
76 } StrMatchResult;
77
78
79 MAKE_INLINE FStr
80 FStr_Init(const char *str, void *freeIt)
81 {
82 FStr fstr;
83 fstr.str = str;
84 fstr.freeIt = freeIt;
85 return fstr;
86 }
87
88 /* Return a string that is the sole owner of str. */
89 MAKE_INLINE FStr
90 FStr_InitOwn(char *str)
91 {
92 return FStr_Init(str, str);
93 }
94
95 /* Return a string that refers to the shared str. */
96 MAKE_INLINE FStr
97 FStr_InitRefer(const char *str)
98 {
99 return FStr_Init(str, NULL);
100 }
101
102 MAKE_INLINE void
103 FStr_Done(FStr *fstr)
104 {
105 free(fstr->freeIt);
106 #ifdef CLEANUP
107 fstr->str = NULL;
108 fstr->freeIt = NULL;
109 #endif
110 }
111
112
113 MAKE_STATIC Substring
114 Substring_Init(const char *start, const char *end)
115 {
116 Substring sub;
117
118 sub.start = start;
119 sub.end = end;
120 return sub;
121 }
122
123 MAKE_INLINE Substring
124 Substring_InitStr(const char *str)
125 {
126 return Substring_Init(str, str + strlen(str));
127 }
128
129 MAKE_STATIC size_t
130 Substring_Length(Substring sub)
131 {
132 return (size_t)(sub.end - sub.start);
133 }
134
135 MAKE_STATIC bool
136 Substring_IsEmpty(Substring sub)
137 {
138 return sub.start == sub.end;
139 }
140
141 MAKE_INLINE bool
142 Substring_Equals(Substring sub, const char *str)
143 {
144 size_t len = strlen(str);
145 return Substring_Length(sub) == len &&
146 memcmp(sub.start, str, len) == 0;
147 }
148
149 MAKE_INLINE bool
150 Substring_Eq(Substring sub, Substring str)
151 {
152 size_t len = Substring_Length(sub);
153 return len == Substring_Length(str) &&
154 memcmp(sub.start, str.start, len) == 0;
155 }
156
157 MAKE_STATIC bool
158 Substring_HasPrefix(Substring sub, Substring prefix)
159 {
160 return Substring_Length(sub) >= Substring_Length(prefix) &&
161 memcmp(sub.start, prefix.start, Substring_Length(prefix)) == 0;
162 }
163
164 MAKE_STATIC bool
165 Substring_HasSuffix(Substring sub, Substring suffix)
166 {
167 size_t suffixLen = Substring_Length(suffix);
168 return Substring_Length(sub) >= suffixLen &&
169 memcmp(sub.end - suffixLen, suffix.start, suffixLen) == 0;
170 }
171
172 /* Returns an independent, null-terminated copy of the substring. */
173 MAKE_STATIC FStr
174 Substring_Str(Substring sub)
175 {
176 if (Substring_IsEmpty(sub))
177 return FStr_InitRefer("");
178 return FStr_InitOwn(bmake_strsedup(sub.start, sub.end));
179 }
180
181 MAKE_STATIC const char *
182 Substring_SkipFirst(Substring sub, char ch)
183 {
184 const char *p;
185
186 for (p = sub.start; p != sub.end; p++)
187 if (*p == ch)
188 return p + 1;
189 return sub.start;
190 }
191
192 MAKE_STATIC const char *
193 Substring_LastIndex(Substring sub, char ch)
194 {
195 const char *p;
196
197 for (p = sub.end; p != sub.start; p--)
198 if (p[-1] == ch)
199 return p - 1;
200 return NULL;
201 }
202
203 MAKE_STATIC Substring
204 Substring_Dirname(Substring pathname)
205 {
206 const char *p;
207
208 for (p = pathname.end; p != pathname.start; p--)
209 if (p[-1] == '/')
210 return Substring_Init(pathname.start, p - 1);
211 return Substring_InitStr(".");
212 }
213
214 MAKE_STATIC Substring
215 Substring_Basename(Substring pathname)
216 {
217 const char *p;
218
219 for (p = pathname.end; p != pathname.start; p--)
220 if (p[-1] == '/')
221 return Substring_Init(p, pathname.end);
222 return pathname;
223 }
224
225
226 MAKE_STATIC void
227 LazyBuf_Init(LazyBuf *buf, const char *expected)
228 {
229 buf->data = NULL;
230 buf->len = 0;
231 buf->cap = 0;
232 buf->expected = expected;
233 }
234
235 MAKE_INLINE void
236 LazyBuf_Done(LazyBuf *buf)
237 {
238 free(buf->data);
239 }
240
241 MAKE_STATIC void
242 LazyBuf_Add(LazyBuf *buf, char ch)
243 {
244
245 if (buf->data != NULL) {
246 if (buf->len == buf->cap) {
247 buf->cap *= 2;
248 buf->data = bmake_realloc(buf->data, buf->cap);
249 }
250 buf->data[buf->len++] = ch;
251
252 } else if (ch == buf->expected[buf->len]) {
253 buf->len++;
254 return;
255
256 } else {
257 buf->cap = buf->len + 16;
258 buf->data = bmake_malloc(buf->cap);
259 memcpy(buf->data, buf->expected, buf->len);
260 buf->data[buf->len++] = ch;
261 }
262 }
263
264 MAKE_STATIC void
265 LazyBuf_AddStr(LazyBuf *buf, const char *str)
266 {
267 const char *p;
268
269 for (p = str; *p != '\0'; p++)
270 LazyBuf_Add(buf, *p);
271 }
272
273 MAKE_INLINE void
274 LazyBuf_AddSubstring(LazyBuf *buf, Substring sub)
275 {
276 const char *p;
277
278 for (p = sub.start; p != sub.end; p++)
279 LazyBuf_Add(buf, *p);
280 }
281
282 MAKE_STATIC Substring
283 LazyBuf_Get(const LazyBuf *buf)
284 {
285 const char *start = buf->data != NULL ? buf->data : buf->expected;
286 return Substring_Init(start, start + buf->len);
287 }
288
289 /*
290 * Returns the content of the buffer as a newly allocated string.
291 *
292 * See LazyBuf_Get to avoid unnecessary memory allocations.
293 */
294 MAKE_STATIC FStr
295 LazyBuf_DoneGet(LazyBuf *buf)
296 {
297 if (buf->data != NULL) {
298 LazyBuf_Add(buf, '\0');
299 return FStr_InitOwn(buf->data);
300 }
301 return Substring_Str(LazyBuf_Get(buf));
302 }
303
304
305 Words Str_Words(const char *, bool);
306
307 MAKE_INLINE void
308 Words_Free(Words w)
309 {
310 free(w.words);
311 free(w.freeIt);
312 }
313
314
315 SubstringWords Substring_Words(const char *, bool);
316
317 MAKE_INLINE void
318 SubstringWords_Init(SubstringWords *w)
319 {
320 w->words = NULL;
321 w->len = 0;
322 w->freeIt = NULL;
323 }
324
325 MAKE_INLINE void
326 SubstringWords_Free(SubstringWords w)
327 {
328 free(w.words);
329 free(w.freeIt);
330 }
331
332
333 char *str_concat2(const char *, const char *);
334 char *str_concat3(const char *, const char *, const char *);
335
336 StrMatchResult Str_Match(const char *, const char *);
337
338 void Str_Intern_Init(void);
339 void Str_Intern_End(void);
340 const char *Str_Intern(const char *);
341