pthread_atfork.c revision 1.27 1 1.27 kre /* $NetBSD: pthread_atfork.c,v 1.27 2025/04/09 22:10:59 kre Exp $ */
2 1.1 nathanw
3 1.1 nathanw /*-
4 1.1 nathanw * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 1.1 nathanw * All rights reserved.
6 1.1 nathanw *
7 1.1 nathanw * This code is derived from software contributed to The NetBSD Foundation
8 1.1 nathanw * by Nathan J. Williams.
9 1.1 nathanw *
10 1.1 nathanw * Redistribution and use in source and binary forms, with or without
11 1.1 nathanw * modification, are permitted provided that the following conditions
12 1.1 nathanw * are met:
13 1.1 nathanw * 1. Redistributions of source code must retain the above copyright
14 1.1 nathanw * notice, this list of conditions and the following disclaimer.
15 1.1 nathanw * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 nathanw * notice, this list of conditions and the following disclaimer in the
17 1.1 nathanw * documentation and/or other materials provided with the distribution.
18 1.1 nathanw *
19 1.1 nathanw * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.1 nathanw * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.1 nathanw * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.1 nathanw * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.1 nathanw * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.1 nathanw * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.1 nathanw * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.1 nathanw * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.1 nathanw * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.1 nathanw * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.1 nathanw * POSSIBILITY OF SUCH DAMAGE.
30 1.1 nathanw */
31 1.1 nathanw
32 1.1 nathanw #include <sys/cdefs.h>
33 1.1 nathanw #if defined(LIBC_SCCS) && !defined(lint)
34 1.27 kre __RCSID("$NetBSD: pthread_atfork.c,v 1.27 2025/04/09 22:10:59 kre Exp $");
35 1.1 nathanw #endif /* LIBC_SCCS and not lint */
36 1.1 nathanw
37 1.1 nathanw #include "namespace.h"
38 1.1 nathanw
39 1.1 nathanw #include <errno.h>
40 1.1 nathanw #include <stdlib.h>
41 1.1 nathanw #include <unistd.h>
42 1.27 kre
43 1.27 kre #include <sys/mman.h>
44 1.27 kre #include <sys/param.h>
45 1.25 riastrad #include <sys/queue.h>
46 1.27 kre #include <sys/sysctl.h>
47 1.27 kre
48 1.15 joerg #include "extern.h"
49 1.1 nathanw #include "reentrant.h"
50 1.1 nathanw
51 1.1 nathanw #ifdef __weak_alias
52 1.1 nathanw __weak_alias(pthread_atfork, _pthread_atfork)
53 1.1 nathanw __weak_alias(fork, _fork)
54 1.1 nathanw #endif /* __weak_alias */
55 1.1 nathanw
56 1.13 joerg pid_t
57 1.14 joerg __locked_fork(int *my_errno)
58 1.13 joerg {
59 1.13 joerg return __fork();
60 1.13 joerg }
61 1.1 nathanw
62 1.25 riastrad struct atfork_callback {
63 1.25 riastrad SIMPLEQ_ENTRY(atfork_callback) next;
64 1.25 riastrad void (*fn)(void);
65 1.25 riastrad };
66 1.25 riastrad
67 1.27 kre struct atfork_cb_header {
68 1.27 kre uint16_t entries;
69 1.27 kre uint16_t used;
70 1.27 kre };
71 1.27 kre
72 1.27 kre struct atfork_cb_block {
73 1.27 kre union {
74 1.27 kre struct atfork_callback block;
75 1.27 kre struct atfork_cb_header hdr;
76 1.27 kre } u;
77 1.27 kre };
78 1.27 kre
79 1.27 kre #define cb_blocks(bp) (&(bp)->u.block)
80 1.27 kre #define cb_ents(bp) (bp)->u.hdr.entries
81 1.27 kre #define cb_used(bp) (bp)->u.hdr.used
82 1.25 riastrad
83 1.22 christos /*
84 1.26 christos * We need to keep a cache for of at least 6, one for prepare, one for parent,
85 1.26 christos * one for child x 2 bexause of the two uses in the libpthread (pthread_init,
86 1.26 christos * pthread_tsd_init) constructors, where it is too early to call malloc(3).
87 1.26 christos * This does not guarantee that we will have enough, because other libraries
88 1.26 christos * can also call pthread_atfork() from their own constructors, so this is not
89 1.26 christos * a complete solution and will need to be fixed properly. For now a keep
90 1.26 christos * space for 16 since it is just 256 bytes.
91 1.22 christos */
92 1.26 christos static struct atfork_callback atfork_builtin[16];
93 1.27 kre static struct atfork_cb_block *atfork_storage = NULL;
94 1.27 kre static int hw_pagesize = 0;
95 1.27 kre
96 1.27 kre static const int hw_pagesize_sysctl[2] = { CTL_HW, HW_PAGESIZE };
97 1.22 christos
98 1.1 nathanw /*
99 1.1 nathanw * Hypothetically, we could protect the queues with a rwlock which is
100 1.1 nathanw * write-locked by pthread_atfork() and read-locked by fork(), but
101 1.1 nathanw * since the intended use of the functions is obtaining locks to hold
102 1.1 nathanw * across the fork, forking is going to be serialized anyway.
103 1.1 nathanw */
104 1.10 christos #ifdef _REENTRANT
105 1.1 nathanw static mutex_t atfork_lock = MUTEX_INITIALIZER;
106 1.10 christos #endif
107 1.1 nathanw SIMPLEQ_HEAD(atfork_callback_q, atfork_callback);
108 1.1 nathanw
109 1.2 nathanw static struct atfork_callback_q prepareq = SIMPLEQ_HEAD_INITIALIZER(prepareq);
110 1.2 nathanw static struct atfork_callback_q parentq = SIMPLEQ_HEAD_INITIALIZER(parentq);
111 1.2 nathanw static struct atfork_callback_q childq = SIMPLEQ_HEAD_INITIALIZER(childq);
112 1.1 nathanw
113 1.27 kre /*
114 1.27 kre * Nb: nothing allocated by this allocator is ever freed.
115 1.27 kre * (there is no API to free anything, and no need for one)
116 1.27 kre *
117 1.27 kre * The code relies upon this.
118 1.27 kre */
119 1.7 ad static struct atfork_callback *
120 1.27 kre af_alloc(unsigned int blocks)
121 1.7 ad {
122 1.27 kre struct atfork_callback *result;
123 1.23 christos
124 1.27 kre if (__predict_false(blocks == 0))
125 1.27 kre return NULL;
126 1.27 kre
127 1.27 kre if (__predict_true(atfork_storage == NULL)) {
128 1.27 kre for (size_t i = 0; i < __arraycount(atfork_builtin); i++) {
129 1.27 kre if (atfork_builtin[i].fn == NULL) {
130 1.27 kre if (i + blocks <= __arraycount(atfork_builtin))
131 1.27 kre return &atfork_builtin[i];
132 1.27 kre else
133 1.27 kre break;
134 1.27 kre }
135 1.27 kre }
136 1.22 christos }
137 1.7 ad
138 1.27 kre if (__predict_false(atfork_storage == NULL ||
139 1.27 kre cb_used(atfork_storage) + blocks > cb_ents(atfork_storage))) {
140 1.27 kre if (__predict_false(hw_pagesize == 0)) {
141 1.27 kre size_t len = sizeof(hw_pagesize);
142 1.27 kre
143 1.27 kre if (sysctl(hw_pagesize_sysctl, 2, &hw_pagesize,
144 1.27 kre &len, NULL, 0) != 0)
145 1.27 kre return NULL;
146 1.27 kre if (len != sizeof(hw_pagesize))
147 1.27 kre return NULL;
148 1.27 kre if (hw_pagesize == 0 || (hw_pagesize & 0xFF) != 0)
149 1.27 kre return NULL;
150 1.27 kre }
151 1.27 kre atfork_storage = mmap(0, hw_pagesize, PROT_READ|PROT_WRITE,
152 1.27 kre MAP_PRIVATE | MAP_ANON, -1, 0);
153 1.27 kre if (__predict_false(atfork_storage == NULL))
154 1.27 kre return NULL;
155 1.27 kre cb_used(atfork_storage) = 1;
156 1.27 kre cb_ents(atfork_storage) =
157 1.27 kre (uint16_t)(hw_pagesize / sizeof(struct atfork_cb_block));
158 1.27 kre if (__predict_false(cb_ents(atfork_storage) < blocks + 1))
159 1.27 kre return NULL;
160 1.27 kre }
161 1.7 ad
162 1.27 kre result = cb_blocks(atfork_storage) + cb_used(atfork_storage);
163 1.27 kre cb_used(atfork_storage) += blocks;
164 1.23 christos
165 1.27 kre return result;
166 1.7 ad }
167 1.7 ad
168 1.1 nathanw int
169 1.1 nathanw pthread_atfork(void (*prepare)(void), void (*parent)(void),
170 1.1 nathanw void (*child)(void))
171 1.1 nathanw {
172 1.1 nathanw struct atfork_callback *newprepare, *newparent, *newchild;
173 1.17 riastrad sigset_t mask, omask;
174 1.17 riastrad int error;
175 1.1 nathanw
176 1.17 riastrad sigfillset(&mask);
177 1.17 riastrad thr_sigsetmask(SIG_SETMASK, &mask, &omask);
178 1.17 riastrad
179 1.7 ad mutex_lock(&atfork_lock);
180 1.27 kre
181 1.27 kre /*
182 1.27 kre * Note here that we either get all the blocks
183 1.27 kre * we need, in one call, or we get NULL.
184 1.27 kre *
185 1.27 kre * Note also that a NULL return is not an error
186 1.27 kre * if no blocks were required (all args == NULL)
187 1.27 kre */
188 1.27 kre newprepare = af_alloc((prepare != NULL) +
189 1.27 kre (parent != NULL) + (child != NULL));
190 1.27 kre
191 1.27 kre error = ENOMEM; /* in case of "goto out" */
192 1.27 kre
193 1.27 kre newparent = newprepare;
194 1.1 nathanw if (prepare != NULL) {
195 1.27 kre if (__predict_false(newprepare == NULL))
196 1.17 riastrad goto out;
197 1.25 riastrad newprepare->fn = prepare;
198 1.27 kre newparent++;
199 1.1 nathanw }
200 1.1 nathanw
201 1.27 kre newchild = newparent;
202 1.1 nathanw if (parent != NULL) {
203 1.27 kre if (__predict_false(newparent == NULL))
204 1.17 riastrad goto out;
205 1.25 riastrad newparent->fn = parent;
206 1.27 kre newchild++;
207 1.1 nathanw }
208 1.1 nathanw
209 1.1 nathanw if (child != NULL) {
210 1.27 kre if (__predict_false(newchild == NULL))
211 1.17 riastrad goto out;
212 1.25 riastrad newchild->fn = child;
213 1.1 nathanw }
214 1.1 nathanw
215 1.25 riastrad /*
216 1.25 riastrad * The order in which the functions are called is specified as
217 1.25 riastrad * LIFO for the prepare handler and FIFO for the others; insert
218 1.25 riastrad * at the head and tail as appropriate so that SIMPLEQ_FOREACH()
219 1.25 riastrad * produces the right order.
220 1.25 riastrad */
221 1.25 riastrad if (prepare)
222 1.25 riastrad SIMPLEQ_INSERT_HEAD(&prepareq, newprepare, next);
223 1.25 riastrad if (parent)
224 1.25 riastrad SIMPLEQ_INSERT_TAIL(&parentq, newparent, next);
225 1.25 riastrad if (child)
226 1.25 riastrad SIMPLEQ_INSERT_TAIL(&childq, newchild, next);
227 1.27 kre
228 1.17 riastrad error = 0;
229 1.1 nathanw
230 1.27 kre out:;
231 1.27 kre mutex_unlock(&atfork_lock);
232 1.17 riastrad thr_sigsetmask(SIG_SETMASK, &omask, NULL);
233 1.17 riastrad return error;
234 1.1 nathanw }
235 1.1 nathanw
236 1.3 lukem pid_t
237 1.3 lukem fork(void)
238 1.1 nathanw {
239 1.1 nathanw struct atfork_callback *iter;
240 1.1 nathanw pid_t ret;
241 1.1 nathanw
242 1.1 nathanw mutex_lock(&atfork_lock);
243 1.1 nathanw SIMPLEQ_FOREACH(iter, &prepareq, next)
244 1.6 yamt (*iter->fn)();
245 1.15 joerg _malloc_prefork();
246 1.1 nathanw
247 1.14 joerg ret = __locked_fork(&errno);
248 1.1 nathanw
249 1.1 nathanw if (ret != 0) {
250 1.1 nathanw /*
251 1.1 nathanw * We are the parent. It doesn't matter here whether
252 1.1 nathanw * the fork call succeeded or failed.
253 1.1 nathanw */
254 1.15 joerg _malloc_postfork();
255 1.1 nathanw SIMPLEQ_FOREACH(iter, &parentq, next)
256 1.6 yamt (*iter->fn)();
257 1.1 nathanw mutex_unlock(&atfork_lock);
258 1.1 nathanw } else {
259 1.1 nathanw /* We are the child */
260 1.15 joerg _malloc_postfork_child();
261 1.1 nathanw SIMPLEQ_FOREACH(iter, &childq, next)
262 1.6 yamt (*iter->fn)();
263 1.1 nathanw /*
264 1.1 nathanw * Note: We are explicitly *not* unlocking
265 1.1 nathanw * atfork_lock. Unlocking atfork_lock is problematic,
266 1.1 nathanw * because if any threads in the parent blocked on it
267 1.1 nathanw * between the initial lock and the fork() syscall,
268 1.1 nathanw * unlocking in the child will try to schedule
269 1.1 nathanw * threads, and either the internal mutex interlock or
270 1.1 nathanw * the runqueue spinlock could have been held at the
271 1.1 nathanw * moment of fork(). Since the other threads do not
272 1.1 nathanw * exist in this process, the spinlock will never be
273 1.1 nathanw * unlocked, and we would wedge.
274 1.1 nathanw * Instead, we reinitialize atfork_lock, since we know
275 1.1 nathanw * that the state of the atfork lists is consistent here,
276 1.1 nathanw * and that there are no other threads to be affected by
277 1.1 nathanw * the forcible cleaning of the queue.
278 1.1 nathanw * This permits double-forking to work, although
279 1.1 nathanw * it requires knowing that it's "safe" to initialize
280 1.1 nathanw * a locked mutex in this context.
281 1.1 nathanw *
282 1.1 nathanw * The problem exists for users of this interface,
283 1.16 andvar * too, since the intended use of pthread_atfork() is
284 1.1 nathanw * to acquire locks across the fork call to ensure
285 1.1 nathanw * that the child sees consistent state. There's not
286 1.1 nathanw * much that can usefully be done in a child handler,
287 1.1 nathanw * and conventional wisdom discourages using them, but
288 1.1 nathanw * they're part of the interface, so here we are...
289 1.1 nathanw */
290 1.1 nathanw mutex_init(&atfork_lock, NULL);
291 1.1 nathanw }
292 1.1 nathanw
293 1.1 nathanw return ret;
294 1.1 nathanw }
295