statd.c revision 1.2 1 1.2 christos /* $NetBSD: statd.c,v 1.2 1997/05/17 15:52:52 christos Exp $ */
2 1.1 scottr
3 1.1 scottr /*
4 1.1 scottr * Copyright (c) 1995
5 1.1 scottr * A.R. Gordon (andrew.gordon (at) net-tel.co.uk). All rights reserved.
6 1.1 scottr *
7 1.1 scottr * Redistribution and use in source and binary forms, with or without
8 1.1 scottr * modification, are permitted provided that the following conditions
9 1.1 scottr * are met:
10 1.1 scottr * 1. Redistributions of source code must retain the above copyright
11 1.1 scottr * notice, this list of conditions and the following disclaimer.
12 1.1 scottr * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 scottr * notice, this list of conditions and the following disclaimer in the
14 1.1 scottr * documentation and/or other materials provided with the distribution.
15 1.1 scottr * 3. All advertising materials mentioning features or use of this software
16 1.1 scottr * must display the following acknowledgement:
17 1.1 scottr * This product includes software developed for the FreeBSD project
18 1.1 scottr * 4. Neither the name of the author nor the names of any co-contributors
19 1.1 scottr * may be used to endorse or promote products derived from this software
20 1.1 scottr * without specific prior written permission.
21 1.1 scottr *
22 1.1 scottr * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND
23 1.1 scottr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.1 scottr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.1 scottr * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 1.1 scottr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.1 scottr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.1 scottr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.1 scottr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.1 scottr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.1 scottr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.1 scottr * SUCH DAMAGE.
33 1.1 scottr *
34 1.1 scottr */
35 1.1 scottr
36 1.1 scottr
37 1.1 scottr /* main() function for status monitor daemon. Some of the code in this */
38 1.1 scottr /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */
39 1.1 scottr /* The actual program logic is in the file procs.c */
40 1.1 scottr
41 1.1 scottr #include <stdio.h>
42 1.1 scottr #include <errno.h>
43 1.1 scottr #include <fcntl.h>
44 1.1 scottr #include <signal.h>
45 1.1 scottr #include <string.h>
46 1.1 scottr #include <syslog.h>
47 1.1 scottr #include <unistd.h>
48 1.1 scottr #include <sys/types.h>
49 1.1 scottr #include <sys/mman.h>
50 1.1 scottr #include <sys/wait.h>
51 1.1 scottr
52 1.1 scottr #include <rpc/rpc.h>
53 1.1 scottr
54 1.1 scottr #include "statd.h"
55 1.1 scottr
56 1.1 scottr #ifndef lint
57 1.2 christos static char rcsid[] = "$NetBSD: statd.c,v 1.2 1997/05/17 15:52:52 christos Exp $";
58 1.1 scottr #endif /* not lint */
59 1.1 scottr
60 1.1 scottr int debug = 0; /* Controls syslog() for debug msgs */
61 1.1 scottr int _rpcsvcdirty = 0; /* XXX ??? */
62 1.1 scottr FileLayout *status_info; /* Pointer to mmap()ed status file */
63 1.1 scottr static int status_fd; /* File descriptor for the open file */
64 1.1 scottr static off_t status_file_len; /* Current on-disc length of file */
65 1.1 scottr
66 1.2 christos extern void sm_prog_1 __P((struct svc_req *, SVCXPRT *));
67 1.2 christos static void handle_sigchld __P((int));
68 1.1 scottr
69 1.1 scottr main(argc, argv)
70 1.1 scottr int argc;
71 1.1 scottr char **argv;
72 1.1 scottr {
73 1.1 scottr SVCXPRT *transp;
74 1.1 scottr struct sigaction sa;
75 1.1 scottr int ch;
76 1.1 scottr
77 1.1 scottr while ((ch = getopt(argc, argv, "d")) != (-1)) {
78 1.1 scottr switch (ch) {
79 1.1 scottr case 'd':
80 1.1 scottr debug = 1;
81 1.1 scottr break;
82 1.1 scottr default:
83 1.1 scottr case '?':
84 1.1 scottr errx(1, "usage: rpc.statd [-d]");
85 1.1 scottr /* NOTREACHED */
86 1.1 scottr }
87 1.1 scottr }
88 1.1 scottr (void)pmap_unset(SM_PROG, SM_VERS);
89 1.1 scottr
90 1.1 scottr transp = svcudp_create(RPC_ANYSOCK);
91 1.1 scottr if (transp == NULL) {
92 1.1 scottr errx(1, "cannot create udp service.");
93 1.1 scottr /* NOTREACHED */
94 1.1 scottr }
95 1.1 scottr if (!svc_register(transp, SM_PROG, SM_VERS, sm_prog_1, IPPROTO_UDP)) {
96 1.1 scottr errx(1, "unable to register (SM_PROG, SM_VERS, udp).");
97 1.1 scottr /* NOTREACHED */
98 1.1 scottr }
99 1.1 scottr transp = svctcp_create(RPC_ANYSOCK, 0, 0);
100 1.1 scottr if (transp == NULL) {
101 1.1 scottr errx(1, "cannot create tcp service.");
102 1.1 scottr /* NOTREACHED */
103 1.1 scottr }
104 1.1 scottr if (!svc_register(transp, SM_PROG, SM_VERS, sm_prog_1, IPPROTO_TCP)) {
105 1.1 scottr errx(1, "unable to register (SM_PROG, SM_VERS, tcp).");
106 1.1 scottr /* NOTREACHED */
107 1.1 scottr }
108 1.1 scottr init_file("/var/db/statd.status");
109 1.1 scottr
110 1.1 scottr /*
111 1.1 scottr * Note that it is NOT sensible to run this program from inetd - the
112 1.1 scottr * protocol assumes that it will run immediately at boot time.
113 1.1 scottr */
114 1.1 scottr daemon(0, 0);
115 1.1 scottr openlog("rpc.statd", 0, LOG_DAEMON);
116 1.1 scottr if (debug)
117 1.1 scottr syslog(LOG_INFO, "Starting - debug enabled");
118 1.1 scottr else
119 1.1 scottr syslog(LOG_INFO, "Starting");
120 1.1 scottr
121 1.1 scottr /* Install signal handler to collect exit status of child processes */
122 1.1 scottr sa.sa_handler = handle_sigchld;
123 1.1 scottr sigemptyset(&sa.sa_mask);
124 1.1 scottr sigaddset(&sa.sa_mask, SIGCHLD);
125 1.1 scottr sa.sa_flags = SA_RESTART;
126 1.1 scottr sigaction(SIGCHLD, &sa, NULL);
127 1.1 scottr
128 1.1 scottr /* Initialisation now complete - start operating */
129 1.1 scottr
130 1.1 scottr /*
131 1.1 scottr * notify_hosts() forks a process (if necessary) to do the
132 1.1 scottr * SM_NOTIFY calls, which may be slow.
133 1.1 scottr */
134 1.1 scottr notify_hosts();
135 1.1 scottr
136 1.1 scottr svc_run(); /* Should never return */
137 1.1 scottr exit(1);
138 1.1 scottr }
139 1.1 scottr
140 1.1 scottr /* handle_sigchld ---------------------------------------------------------- */
141 1.1 scottr /*
142 1.1 scottr * Purpose: Catch SIGCHLD and collect process status
143 1.1 scottr * Returns: Nothing.
144 1.1 scottr * Notes: No special action required, other than to collect the
145 1.1 scottr * process status and hence allow the child to die:
146 1.1 scottr * we only use child processes for asynchronous transmission
147 1.1 scottr * of SM_NOTIFY to other systems, so it is normal for the
148 1.1 scottr * children to exit when they have done their work.
149 1.1 scottr */
150 1.1 scottr static void
151 1.2 christos handle_sigchld(sig)
152 1.2 christos int sig;
153 1.1 scottr {
154 1.1 scottr int pid, status;
155 1.1 scottr pid = wait4(-1, &status, WNOHANG, (struct rusage *) 0);
156 1.1 scottr if (!pid)
157 1.1 scottr syslog(LOG_ERR, "Phantom SIGCHLD??");
158 1.1 scottr else if (status)
159 1.1 scottr syslog(LOG_ERR, "Child %d failed with status %d", pid,
160 1.1 scottr WEXITSTATUS(status));
161 1.1 scottr else if (debug)
162 1.1 scottr syslog(LOG_DEBUG, "Child %d exited OK", pid);
163 1.1 scottr }
164 1.1 scottr
165 1.1 scottr /* sync_file --------------------------------------------------------------- */
166 1.1 scottr /*
167 1.1 scottr * Purpose: Packaged call of msync() to flush changes to mmap()ed file
168 1.1 scottr * Returns: Nothing. Errors to syslog.
169 1.1 scottr */
170 1.1 scottr void
171 1.1 scottr sync_file()
172 1.1 scottr {
173 1.1 scottr if (msync((void *)status_info, 0) < 0)
174 1.1 scottr syslog(LOG_ERR, "msync() failed: %s", strerror(errno));
175 1.1 scottr }
176 1.1 scottr
177 1.1 scottr /* find_host -------------------------------------------------------------- */
178 1.1 scottr /*
179 1.1 scottr * Purpose: Find the entry in the status file for a given host
180 1.1 scottr * Returns: Pointer to that entry in the mmap() region, or NULL.
181 1.1 scottr * Notes: Also creates entries if requested.
182 1.1 scottr * Failure to create also returns NULL.
183 1.1 scottr */
184 1.1 scottr HostInfo *
185 1.1 scottr find_host(hostname, create)
186 1.1 scottr char *hostname;
187 1.1 scottr int create;
188 1.1 scottr {
189 1.1 scottr HostInfo *hp;
190 1.1 scottr HostInfo *spare_slot = NULL;
191 1.1 scottr HostInfo *result = NULL;
192 1.1 scottr int i;
193 1.1 scottr
194 1.1 scottr for (i = 0, hp = status_info->hosts; i < status_info->noOfHosts;
195 1.1 scottr i++, hp++) {
196 1.1 scottr if (!strncasecmp(hostname, hp->hostname, SM_MAXSTRLEN)) {
197 1.1 scottr result = hp;
198 1.1 scottr break;
199 1.1 scottr }
200 1.1 scottr if (!spare_slot && !hp->monList && !hp->notifyReqd)
201 1.1 scottr spare_slot = hp;
202 1.1 scottr }
203 1.1 scottr
204 1.1 scottr /* Return if entry found, or if not asked to create one. */
205 1.1 scottr if (result || !create)
206 1.1 scottr return (result);
207 1.1 scottr
208 1.1 scottr /*
209 1.1 scottr * Now create an entry, using the spare slot if one was found or
210 1.1 scottr * adding to the end of the list otherwise, extending file if req'd
211 1.1 scottr */
212 1.1 scottr if (!spare_slot) {
213 1.1 scottr off_t desired_size;
214 1.1 scottr spare_slot = &status_info->hosts[status_info->noOfHosts];
215 1.1 scottr desired_size = ((char *)spare_slot - (char *)status_info) +
216 1.1 scottr sizeof(HostInfo);
217 1.1 scottr
218 1.1 scottr if (desired_size > status_file_len) {
219 1.1 scottr /* Extend file by writing 1 byte of junk at the
220 1.1 scottr * desired end pos */
221 1.1 scottr lseek(status_fd, desired_size - 1, SEEK_SET);
222 1.1 scottr i = write(status_fd, &i, 1);
223 1.1 scottr if (i < 1) {
224 1.1 scottr syslog(LOG_ERR, "Unable to extend status file");
225 1.1 scottr return (NULL);
226 1.1 scottr }
227 1.1 scottr status_file_len = desired_size;
228 1.1 scottr }
229 1.1 scottr status_info->noOfHosts++;
230 1.1 scottr }
231 1.1 scottr /*
232 1.1 scottr * Initialise the spare slot that has been found/created
233 1.1 scottr * Note that we do not msync(), since the caller is presumed to be
234 1.1 scottr * about to modify the entry further
235 1.1 scottr */
236 1.1 scottr memset(spare_slot, 0, sizeof(HostInfo));
237 1.1 scottr strncpy(spare_slot->hostname, hostname, SM_MAXSTRLEN);
238 1.1 scottr return (spare_slot);
239 1.1 scottr }
240 1.1 scottr
241 1.1 scottr /* init_file -------------------------------------------------------------- */
242 1.1 scottr /*
243 1.1 scottr * Purpose: Open file, create if necessary, initialise it.
244 1.1 scottr * Returns: Nothing - exits on error
245 1.1 scottr * Notes: Called before process becomes daemon, hence logs to
246 1.1 scottr * stderr rather than syslog.
247 1.1 scottr * Opens the file, then mmap()s it for ease of access.
248 1.1 scottr * Also performs initial clean-up of the file, zeroing
249 1.1 scottr * monitor list pointers, setting the notifyReqd flag in
250 1.1 scottr * all hosts that had a monitor list, and incrementing
251 1.1 scottr * the state number to the next even value.
252 1.1 scottr */
253 1.1 scottr void
254 1.1 scottr init_file(filename)
255 1.1 scottr char *filename;
256 1.1 scottr {
257 1.1 scottr char buf[HEADER_LEN];
258 1.1 scottr int new_file = FALSE;
259 1.1 scottr int i;
260 1.1 scottr
261 1.1 scottr /* try to open existing file - if not present, create one */
262 1.1 scottr status_fd = open(filename, O_RDWR);
263 1.1 scottr if ((status_fd < 0) && (errno == ENOENT)) {
264 1.1 scottr status_fd = open(filename, O_RDWR | O_CREAT, 0644);
265 1.1 scottr new_file = TRUE;
266 1.1 scottr }
267 1.1 scottr if (status_fd < 0) {
268 1.1 scottr err(1, "unable to open status file %s", filename);
269 1.1 scottr /* NOTREACHED */
270 1.1 scottr }
271 1.1 scottr
272 1.1 scottr /*
273 1.1 scottr * File now open. mmap() it, with a generous size to allow for
274 1.1 scottr * later growth, where we will extend the file but not re-map it.
275 1.1 scottr */
276 1.1 scottr status_info = (FileLayout *)mmap(NULL, 0x10000000,
277 1.1 scottr PROT_READ | PROT_WRITE, MAP_SHARED, status_fd, 0);
278 1.1 scottr
279 1.1 scottr if (status_info == (FileLayout *)(-1)) {
280 1.1 scottr perror("rpc.statd");
281 1.1 scottr fprintf(stderr, "Unable to mmap() status file\n");
282 1.1 scottr }
283 1.1 scottr status_file_len = lseek(status_fd, 0L, SEEK_END);
284 1.1 scottr
285 1.1 scottr /*
286 1.1 scottr * If the file was not newly created, validate the contents, and if
287 1.1 scottr * defective, re-create from scratch.
288 1.1 scottr */
289 1.1 scottr if (!new_file) {
290 1.1 scottr if ((status_file_len < HEADER_LEN) || (status_file_len <
291 1.1 scottr (HEADER_LEN + sizeof(HostInfo) * status_info->noOfHosts))) {
292 1.1 scottr fprintf(stderr, "rpc.statd: status file is corrupt\n");
293 1.1 scottr new_file = TRUE;
294 1.1 scottr }
295 1.1 scottr }
296 1.1 scottr /* Initialisation of a new, empty file. */
297 1.1 scottr if (new_file) {
298 1.1 scottr memset(buf, 0, sizeof(buf));
299 1.1 scottr lseek(status_fd, 0L, SEEK_SET);
300 1.1 scottr write(status_fd, buf, HEADER_LEN);
301 1.1 scottr status_file_len = HEADER_LEN;
302 1.1 scottr } else {
303 1.1 scottr /*
304 1.1 scottr * Clean-up of existing file - monitored hosts will have a
305 1.1 scottr * pointer to a list of clients, which refers to memory in
306 1.1 scottr * the previous incarnation of the program and so are
307 1.1 scottr * meaningless now. These pointers are zeroed and the fact
308 1.1 scottr * that the host was previously monitored is recorded by
309 1.1 scottr * setting the notifyReqd flag, which will in due course
310 1.1 scottr * cause a SM_NOTIFY to be sent.
311 1.1 scottr *
312 1.1 scottr * Note that if we crash twice in quick succession, some hosts
313 1.1 scottr * may already have notifyReqd set, where we didn't manage to
314 1.1 scottr * notify them before the second crash occurred.
315 1.1 scottr */
316 1.1 scottr for (i = 0; i < status_info->noOfHosts; i++) {
317 1.1 scottr HostInfo *this_host = &status_info->hosts[i];
318 1.1 scottr
319 1.1 scottr if (this_host->monList) {
320 1.1 scottr this_host->notifyReqd = TRUE;
321 1.1 scottr this_host->monList = NULL;
322 1.1 scottr }
323 1.1 scottr }
324 1.1 scottr /* Select the next higher even number for the state counter */
325 1.1 scottr status_info->ourState =
326 1.1 scottr (status_info->ourState + 2) & 0xfffffffe;
327 1.1 scottr status_info->ourState++; /* XXX - ??? */
328 1.1 scottr }
329 1.1 scottr }
330 1.1 scottr
331 1.1 scottr /* notify_one_host --------------------------------------------------------- */
332 1.1 scottr /*
333 1.1 scottr * Purpose: Perform SM_NOTIFY procedure at specified host
334 1.1 scottr * Returns: TRUE if success, FALSE if failed.
335 1.1 scottr */
336 1.1 scottr static int
337 1.1 scottr notify_one_host(hostname)
338 1.1 scottr char *hostname;
339 1.1 scottr {
340 1.1 scottr struct timeval timeout = {20, 0}; /* 20 secs timeout */
341 1.1 scottr CLIENT *cli;
342 1.1 scottr char dummy;
343 1.1 scottr stat_chge arg;
344 1.1 scottr char our_hostname[SM_MAXSTRLEN + 1];
345 1.1 scottr
346 1.1 scottr gethostname(our_hostname, sizeof(our_hostname));
347 1.1 scottr our_hostname[SM_MAXSTRLEN] = '\0';
348 1.1 scottr arg.mon_name = our_hostname;
349 1.1 scottr arg.state = status_info->ourState;
350 1.1 scottr
351 1.1 scottr if (debug)
352 1.1 scottr syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s",
353 1.1 scottr hostname, our_hostname);
354 1.1 scottr
355 1.1 scottr cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp");
356 1.1 scottr if (!cli) {
357 1.1 scottr syslog(LOG_ERR, "Failed to contact host %s%s", hostname,
358 1.1 scottr clnt_spcreateerror(""));
359 1.1 scottr return (FALSE);
360 1.1 scottr }
361 1.1 scottr if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void,
362 1.1 scottr &dummy, timeout) != RPC_SUCCESS) {
363 1.1 scottr syslog(LOG_ERR, "Failed to contact rpc.statd at host %s",
364 1.1 scottr hostname);
365 1.1 scottr clnt_destroy(cli);
366 1.1 scottr return (FALSE);
367 1.1 scottr }
368 1.1 scottr clnt_destroy(cli);
369 1.1 scottr return (TRUE);
370 1.1 scottr }
371 1.1 scottr
372 1.1 scottr /* notify_hosts ------------------------------------------------------------ */
373 1.1 scottr /*
374 1.1 scottr * Purpose: Send SM_NOTIFY to all hosts marked as requiring it
375 1.1 scottr * Returns: Nothing, immediately - forks a process to do the work.
376 1.1 scottr * Notes: Does nothing if there are no monitored hosts.
377 1.1 scottr * Called after all the initialisation has been done -
378 1.1 scottr * logs to syslog.
379 1.1 scottr */
380 1.1 scottr void
381 1.1 scottr notify_hosts(void)
382 1.1 scottr {
383 1.1 scottr HostInfo *hp;
384 1.1 scottr int i, attempts;
385 1.1 scottr int work_to_do = FALSE;
386 1.1 scottr pid_t pid;
387 1.1 scottr
388 1.1 scottr /* First check if there is in fact any work to do. */
389 1.1 scottr for (i = status_info->noOfHosts, hp = status_info->hosts; i;
390 1.1 scottr i--, hp++) {
391 1.1 scottr if (hp->notifyReqd) {
392 1.1 scottr work_to_do = TRUE;
393 1.1 scottr break;
394 1.1 scottr }
395 1.1 scottr }
396 1.1 scottr
397 1.1 scottr if (!work_to_do)
398 1.1 scottr return; /* No work found */
399 1.1 scottr
400 1.1 scottr pid = fork();
401 1.1 scottr if (pid == -1) {
402 1.1 scottr syslog(LOG_ERR, "Unable to fork notify process - %s",
403 1.1 scottr strerror(errno));
404 1.1 scottr return;
405 1.1 scottr }
406 1.1 scottr if (pid)
407 1.1 scottr return;
408 1.1 scottr
409 1.1 scottr /*
410 1.1 scottr * Here in the child process. We continue until all the hosts marked
411 1.1 scottr * as requiring notification have been duly notified.
412 1.1 scottr * If one of the initial attempts fails, we sleep for a while and
413 1.1 scottr * have another go. This is necessary because when we have crashed,
414 1.1 scottr * (eg. a power outage) it is quite possible that we won't be able to
415 1.1 scottr * contact all monitored hosts immediately on restart, either because
416 1.1 scottr * they crashed too and take longer to come up (in which case the
417 1.1 scottr * notification isn't really required), or more importantly if some
418 1.1 scottr * router etc. needed to reach the monitored host has not come back
419 1.1 scottr * up yet. In this case, we will be a bit late in re-establishing
420 1.1 scottr * locks (after the grace period) but that is the best we can do.
421 1.1 scottr * We try 10 times at 5 sec intervals, 10 more times at 1 minute
422 1.1 scottr * intervals, then 24 more times at hourly intervals, finally
423 1.1 scottr * giving up altogether if the host hasn't come back to life after
424 1.1 scottr * 24 hours.
425 1.1 scottr */
426 1.1 scottr for (attempts = 0; attempts < 44; attempts++) {
427 1.1 scottr work_to_do = FALSE; /* Unless anything fails */
428 1.1 scottr for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0;
429 1.1 scottr i--, hp++) {
430 1.1 scottr if (hp->notifyReqd) {
431 1.1 scottr if (notify_one_host(hp->hostname)) {
432 1.1 scottr hp->notifyReqd = FALSE;
433 1.1 scottr sync_file();
434 1.1 scottr } else
435 1.1 scottr work_to_do = TRUE;
436 1.1 scottr }
437 1.1 scottr }
438 1.1 scottr if (!work_to_do)
439 1.1 scottr break;
440 1.1 scottr if (attempts < 10)
441 1.1 scottr sleep(5);
442 1.1 scottr else
443 1.1 scottr if (attempts < 20)
444 1.1 scottr sleep(60);
445 1.1 scottr else
446 1.1 scottr sleep(60 * 60);
447 1.1 scottr }
448 1.1 scottr exit(0);
449 1.1 scottr }
450