statd.c revision 1.1 1 1.1 scottr /* $NetBSD: statd.c,v 1.1 1997/03/10 06:28:31 scottr Exp $ */
2 1.1 scottr
3 1.1 scottr /*
4 1.1 scottr * Copyright (c) 1995
5 1.1 scottr * A.R. Gordon (andrew.gordon (at) net-tel.co.uk). All rights reserved.
6 1.1 scottr *
7 1.1 scottr * Redistribution and use in source and binary forms, with or without
8 1.1 scottr * modification, are permitted provided that the following conditions
9 1.1 scottr * are met:
10 1.1 scottr * 1. Redistributions of source code must retain the above copyright
11 1.1 scottr * notice, this list of conditions and the following disclaimer.
12 1.1 scottr * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 scottr * notice, this list of conditions and the following disclaimer in the
14 1.1 scottr * documentation and/or other materials provided with the distribution.
15 1.1 scottr * 3. All advertising materials mentioning features or use of this software
16 1.1 scottr * must display the following acknowledgement:
17 1.1 scottr * This product includes software developed for the FreeBSD project
18 1.1 scottr * 4. Neither the name of the author nor the names of any co-contributors
19 1.1 scottr * may be used to endorse or promote products derived from this software
20 1.1 scottr * without specific prior written permission.
21 1.1 scottr *
22 1.1 scottr * THIS SOFTWARE IS PROVIDED BY ANDREW GORDON AND CONTRIBUTORS ``AS IS'' AND
23 1.1 scottr * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 1.1 scottr * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 1.1 scottr * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26 1.1 scottr * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 1.1 scottr * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 1.1 scottr * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 1.1 scottr * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 1.1 scottr * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 1.1 scottr * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 1.1 scottr * SUCH DAMAGE.
33 1.1 scottr *
34 1.1 scottr */
35 1.1 scottr
36 1.1 scottr
37 1.1 scottr /* main() function for status monitor daemon. Some of the code in this */
38 1.1 scottr /* file was generated by running rpcgen /usr/include/rpcsvc/sm_inter.x */
39 1.1 scottr /* The actual program logic is in the file procs.c */
40 1.1 scottr
41 1.1 scottr #include <stdio.h>
42 1.1 scottr #include <errno.h>
43 1.1 scottr #include <fcntl.h>
44 1.1 scottr #include <signal.h>
45 1.1 scottr #include <string.h>
46 1.1 scottr #include <syslog.h>
47 1.1 scottr #include <unistd.h>
48 1.1 scottr #include <sys/types.h>
49 1.1 scottr #include <sys/mman.h>
50 1.1 scottr #include <sys/wait.h>
51 1.1 scottr
52 1.1 scottr #include <rpc/rpc.h>
53 1.1 scottr
54 1.1 scottr #include "statd.h"
55 1.1 scottr
56 1.1 scottr #ifndef lint
57 1.1 scottr static char rcsid[] = "$NetBSD: statd.c,v 1.1 1997/03/10 06:28:31 scottr Exp $";
58 1.1 scottr #endif /* not lint */
59 1.1 scottr
60 1.1 scottr int debug = 0; /* Controls syslog() for debug msgs */
61 1.1 scottr int _rpcsvcdirty = 0; /* XXX ??? */
62 1.1 scottr FileLayout *status_info; /* Pointer to mmap()ed status file */
63 1.1 scottr static int status_fd; /* File descriptor for the open file */
64 1.1 scottr static off_t status_file_len; /* Current on-disc length of file */
65 1.1 scottr
66 1.1 scottr extern void sm_prog_1(struct svc_req * rqstp, SVCXPRT * transp);
67 1.1 scottr static void handle_sigchld();
68 1.1 scottr
69 1.1 scottr main(argc, argv)
70 1.1 scottr int argc;
71 1.1 scottr char **argv;
72 1.1 scottr {
73 1.1 scottr SVCXPRT *transp;
74 1.1 scottr struct sigaction sa;
75 1.1 scottr int ch;
76 1.1 scottr
77 1.1 scottr while ((ch = getopt(argc, argv, "d")) != (-1)) {
78 1.1 scottr switch (ch) {
79 1.1 scottr case 'd':
80 1.1 scottr debug = 1;
81 1.1 scottr break;
82 1.1 scottr default:
83 1.1 scottr case '?':
84 1.1 scottr errx(1, "usage: rpc.statd [-d]");
85 1.1 scottr /* NOTREACHED */
86 1.1 scottr }
87 1.1 scottr }
88 1.1 scottr (void)pmap_unset(SM_PROG, SM_VERS);
89 1.1 scottr
90 1.1 scottr transp = svcudp_create(RPC_ANYSOCK);
91 1.1 scottr if (transp == NULL) {
92 1.1 scottr errx(1, "cannot create udp service.");
93 1.1 scottr /* NOTREACHED */
94 1.1 scottr }
95 1.1 scottr if (!svc_register(transp, SM_PROG, SM_VERS, sm_prog_1, IPPROTO_UDP)) {
96 1.1 scottr errx(1, "unable to register (SM_PROG, SM_VERS, udp).");
97 1.1 scottr /* NOTREACHED */
98 1.1 scottr }
99 1.1 scottr transp = svctcp_create(RPC_ANYSOCK, 0, 0);
100 1.1 scottr if (transp == NULL) {
101 1.1 scottr errx(1, "cannot create tcp service.");
102 1.1 scottr /* NOTREACHED */
103 1.1 scottr }
104 1.1 scottr if (!svc_register(transp, SM_PROG, SM_VERS, sm_prog_1, IPPROTO_TCP)) {
105 1.1 scottr errx(1, "unable to register (SM_PROG, SM_VERS, tcp).");
106 1.1 scottr /* NOTREACHED */
107 1.1 scottr }
108 1.1 scottr init_file("/var/db/statd.status");
109 1.1 scottr
110 1.1 scottr /*
111 1.1 scottr * Note that it is NOT sensible to run this program from inetd - the
112 1.1 scottr * protocol assumes that it will run immediately at boot time.
113 1.1 scottr */
114 1.1 scottr daemon(0, 0);
115 1.1 scottr openlog("rpc.statd", 0, LOG_DAEMON);
116 1.1 scottr if (debug)
117 1.1 scottr syslog(LOG_INFO, "Starting - debug enabled");
118 1.1 scottr else
119 1.1 scottr syslog(LOG_INFO, "Starting");
120 1.1 scottr
121 1.1 scottr /* Install signal handler to collect exit status of child processes */
122 1.1 scottr sa.sa_handler = handle_sigchld;
123 1.1 scottr sigemptyset(&sa.sa_mask);
124 1.1 scottr sigaddset(&sa.sa_mask, SIGCHLD);
125 1.1 scottr sa.sa_flags = SA_RESTART;
126 1.1 scottr sigaction(SIGCHLD, &sa, NULL);
127 1.1 scottr
128 1.1 scottr /* Initialisation now complete - start operating */
129 1.1 scottr
130 1.1 scottr /*
131 1.1 scottr * notify_hosts() forks a process (if necessary) to do the
132 1.1 scottr * SM_NOTIFY calls, which may be slow.
133 1.1 scottr */
134 1.1 scottr notify_hosts();
135 1.1 scottr
136 1.1 scottr svc_run(); /* Should never return */
137 1.1 scottr exit(1);
138 1.1 scottr }
139 1.1 scottr
140 1.1 scottr /* handle_sigchld ---------------------------------------------------------- */
141 1.1 scottr /*
142 1.1 scottr * Purpose: Catch SIGCHLD and collect process status
143 1.1 scottr * Returns: Nothing.
144 1.1 scottr * Notes: No special action required, other than to collect the
145 1.1 scottr * process status and hence allow the child to die:
146 1.1 scottr * we only use child processes for asynchronous transmission
147 1.1 scottr * of SM_NOTIFY to other systems, so it is normal for the
148 1.1 scottr * children to exit when they have done their work.
149 1.1 scottr */
150 1.1 scottr static void
151 1.1 scottr handle_sigchld(sig, code, scp)
152 1.1 scottr int sig, code;
153 1.1 scottr struct sigcontext *scp;
154 1.1 scottr {
155 1.1 scottr int pid, status;
156 1.1 scottr pid = wait4(-1, &status, WNOHANG, (struct rusage *) 0);
157 1.1 scottr if (!pid)
158 1.1 scottr syslog(LOG_ERR, "Phantom SIGCHLD??");
159 1.1 scottr else if (status)
160 1.1 scottr syslog(LOG_ERR, "Child %d failed with status %d", pid,
161 1.1 scottr WEXITSTATUS(status));
162 1.1 scottr else if (debug)
163 1.1 scottr syslog(LOG_DEBUG, "Child %d exited OK", pid);
164 1.1 scottr }
165 1.1 scottr
166 1.1 scottr /* sync_file --------------------------------------------------------------- */
167 1.1 scottr /*
168 1.1 scottr * Purpose: Packaged call of msync() to flush changes to mmap()ed file
169 1.1 scottr * Returns: Nothing. Errors to syslog.
170 1.1 scottr */
171 1.1 scottr void
172 1.1 scottr sync_file()
173 1.1 scottr {
174 1.1 scottr if (msync((void *)status_info, 0) < 0)
175 1.1 scottr syslog(LOG_ERR, "msync() failed: %s", strerror(errno));
176 1.1 scottr }
177 1.1 scottr
178 1.1 scottr /* find_host -------------------------------------------------------------- */
179 1.1 scottr /*
180 1.1 scottr * Purpose: Find the entry in the status file for a given host
181 1.1 scottr * Returns: Pointer to that entry in the mmap() region, or NULL.
182 1.1 scottr * Notes: Also creates entries if requested.
183 1.1 scottr * Failure to create also returns NULL.
184 1.1 scottr */
185 1.1 scottr HostInfo *
186 1.1 scottr find_host(hostname, create)
187 1.1 scottr char *hostname;
188 1.1 scottr int create;
189 1.1 scottr {
190 1.1 scottr HostInfo *hp;
191 1.1 scottr HostInfo *spare_slot = NULL;
192 1.1 scottr HostInfo *result = NULL;
193 1.1 scottr int i;
194 1.1 scottr
195 1.1 scottr for (i = 0, hp = status_info->hosts; i < status_info->noOfHosts;
196 1.1 scottr i++, hp++) {
197 1.1 scottr if (!strncasecmp(hostname, hp->hostname, SM_MAXSTRLEN)) {
198 1.1 scottr result = hp;
199 1.1 scottr break;
200 1.1 scottr }
201 1.1 scottr if (!spare_slot && !hp->monList && !hp->notifyReqd)
202 1.1 scottr spare_slot = hp;
203 1.1 scottr }
204 1.1 scottr
205 1.1 scottr /* Return if entry found, or if not asked to create one. */
206 1.1 scottr if (result || !create)
207 1.1 scottr return (result);
208 1.1 scottr
209 1.1 scottr /*
210 1.1 scottr * Now create an entry, using the spare slot if one was found or
211 1.1 scottr * adding to the end of the list otherwise, extending file if req'd
212 1.1 scottr */
213 1.1 scottr if (!spare_slot) {
214 1.1 scottr off_t desired_size;
215 1.1 scottr spare_slot = &status_info->hosts[status_info->noOfHosts];
216 1.1 scottr desired_size = ((char *)spare_slot - (char *)status_info) +
217 1.1 scottr sizeof(HostInfo);
218 1.1 scottr
219 1.1 scottr if (desired_size > status_file_len) {
220 1.1 scottr /* Extend file by writing 1 byte of junk at the
221 1.1 scottr * desired end pos */
222 1.1 scottr lseek(status_fd, desired_size - 1, SEEK_SET);
223 1.1 scottr i = write(status_fd, &i, 1);
224 1.1 scottr if (i < 1) {
225 1.1 scottr syslog(LOG_ERR, "Unable to extend status file");
226 1.1 scottr return (NULL);
227 1.1 scottr }
228 1.1 scottr status_file_len = desired_size;
229 1.1 scottr }
230 1.1 scottr status_info->noOfHosts++;
231 1.1 scottr }
232 1.1 scottr /*
233 1.1 scottr * Initialise the spare slot that has been found/created
234 1.1 scottr * Note that we do not msync(), since the caller is presumed to be
235 1.1 scottr * about to modify the entry further
236 1.1 scottr */
237 1.1 scottr memset(spare_slot, 0, sizeof(HostInfo));
238 1.1 scottr strncpy(spare_slot->hostname, hostname, SM_MAXSTRLEN);
239 1.1 scottr return (spare_slot);
240 1.1 scottr }
241 1.1 scottr
242 1.1 scottr /* init_file -------------------------------------------------------------- */
243 1.1 scottr /*
244 1.1 scottr * Purpose: Open file, create if necessary, initialise it.
245 1.1 scottr * Returns: Nothing - exits on error
246 1.1 scottr * Notes: Called before process becomes daemon, hence logs to
247 1.1 scottr * stderr rather than syslog.
248 1.1 scottr * Opens the file, then mmap()s it for ease of access.
249 1.1 scottr * Also performs initial clean-up of the file, zeroing
250 1.1 scottr * monitor list pointers, setting the notifyReqd flag in
251 1.1 scottr * all hosts that had a monitor list, and incrementing
252 1.1 scottr * the state number to the next even value.
253 1.1 scottr */
254 1.1 scottr void
255 1.1 scottr init_file(filename)
256 1.1 scottr char *filename;
257 1.1 scottr {
258 1.1 scottr char buf[HEADER_LEN];
259 1.1 scottr int new_file = FALSE;
260 1.1 scottr int i;
261 1.1 scottr
262 1.1 scottr /* try to open existing file - if not present, create one */
263 1.1 scottr status_fd = open(filename, O_RDWR);
264 1.1 scottr if ((status_fd < 0) && (errno == ENOENT)) {
265 1.1 scottr status_fd = open(filename, O_RDWR | O_CREAT, 0644);
266 1.1 scottr new_file = TRUE;
267 1.1 scottr }
268 1.1 scottr if (status_fd < 0) {
269 1.1 scottr err(1, "unable to open status file %s", filename);
270 1.1 scottr /* NOTREACHED */
271 1.1 scottr }
272 1.1 scottr
273 1.1 scottr /*
274 1.1 scottr * File now open. mmap() it, with a generous size to allow for
275 1.1 scottr * later growth, where we will extend the file but not re-map it.
276 1.1 scottr */
277 1.1 scottr status_info = (FileLayout *)mmap(NULL, 0x10000000,
278 1.1 scottr PROT_READ | PROT_WRITE, MAP_SHARED, status_fd, 0);
279 1.1 scottr
280 1.1 scottr if (status_info == (FileLayout *)(-1)) {
281 1.1 scottr perror("rpc.statd");
282 1.1 scottr fprintf(stderr, "Unable to mmap() status file\n");
283 1.1 scottr }
284 1.1 scottr status_file_len = lseek(status_fd, 0L, SEEK_END);
285 1.1 scottr
286 1.1 scottr /*
287 1.1 scottr * If the file was not newly created, validate the contents, and if
288 1.1 scottr * defective, re-create from scratch.
289 1.1 scottr */
290 1.1 scottr if (!new_file) {
291 1.1 scottr if ((status_file_len < HEADER_LEN) || (status_file_len <
292 1.1 scottr (HEADER_LEN + sizeof(HostInfo) * status_info->noOfHosts))) {
293 1.1 scottr fprintf(stderr, "rpc.statd: status file is corrupt\n");
294 1.1 scottr new_file = TRUE;
295 1.1 scottr }
296 1.1 scottr }
297 1.1 scottr /* Initialisation of a new, empty file. */
298 1.1 scottr if (new_file) {
299 1.1 scottr memset(buf, 0, sizeof(buf));
300 1.1 scottr lseek(status_fd, 0L, SEEK_SET);
301 1.1 scottr write(status_fd, buf, HEADER_LEN);
302 1.1 scottr status_file_len = HEADER_LEN;
303 1.1 scottr } else {
304 1.1 scottr /*
305 1.1 scottr * Clean-up of existing file - monitored hosts will have a
306 1.1 scottr * pointer to a list of clients, which refers to memory in
307 1.1 scottr * the previous incarnation of the program and so are
308 1.1 scottr * meaningless now. These pointers are zeroed and the fact
309 1.1 scottr * that the host was previously monitored is recorded by
310 1.1 scottr * setting the notifyReqd flag, which will in due course
311 1.1 scottr * cause a SM_NOTIFY to be sent.
312 1.1 scottr *
313 1.1 scottr * Note that if we crash twice in quick succession, some hosts
314 1.1 scottr * may already have notifyReqd set, where we didn't manage to
315 1.1 scottr * notify them before the second crash occurred.
316 1.1 scottr */
317 1.1 scottr for (i = 0; i < status_info->noOfHosts; i++) {
318 1.1 scottr HostInfo *this_host = &status_info->hosts[i];
319 1.1 scottr
320 1.1 scottr if (this_host->monList) {
321 1.1 scottr this_host->notifyReqd = TRUE;
322 1.1 scottr this_host->monList = NULL;
323 1.1 scottr }
324 1.1 scottr }
325 1.1 scottr /* Select the next higher even number for the state counter */
326 1.1 scottr status_info->ourState =
327 1.1 scottr (status_info->ourState + 2) & 0xfffffffe;
328 1.1 scottr status_info->ourState++; /* XXX - ??? */
329 1.1 scottr }
330 1.1 scottr }
331 1.1 scottr
332 1.1 scottr /* notify_one_host --------------------------------------------------------- */
333 1.1 scottr /*
334 1.1 scottr * Purpose: Perform SM_NOTIFY procedure at specified host
335 1.1 scottr * Returns: TRUE if success, FALSE if failed.
336 1.1 scottr */
337 1.1 scottr static int
338 1.1 scottr notify_one_host(hostname)
339 1.1 scottr char *hostname;
340 1.1 scottr {
341 1.1 scottr struct timeval timeout = {20, 0}; /* 20 secs timeout */
342 1.1 scottr CLIENT *cli;
343 1.1 scottr char dummy;
344 1.1 scottr stat_chge arg;
345 1.1 scottr char our_hostname[SM_MAXSTRLEN + 1];
346 1.1 scottr
347 1.1 scottr gethostname(our_hostname, sizeof(our_hostname));
348 1.1 scottr our_hostname[SM_MAXSTRLEN] = '\0';
349 1.1 scottr arg.mon_name = our_hostname;
350 1.1 scottr arg.state = status_info->ourState;
351 1.1 scottr
352 1.1 scottr if (debug)
353 1.1 scottr syslog(LOG_DEBUG, "Sending SM_NOTIFY to host %s from %s",
354 1.1 scottr hostname, our_hostname);
355 1.1 scottr
356 1.1 scottr cli = clnt_create(hostname, SM_PROG, SM_VERS, "udp");
357 1.1 scottr if (!cli) {
358 1.1 scottr syslog(LOG_ERR, "Failed to contact host %s%s", hostname,
359 1.1 scottr clnt_spcreateerror(""));
360 1.1 scottr return (FALSE);
361 1.1 scottr }
362 1.1 scottr if (clnt_call(cli, SM_NOTIFY, xdr_stat_chge, &arg, xdr_void,
363 1.1 scottr &dummy, timeout) != RPC_SUCCESS) {
364 1.1 scottr syslog(LOG_ERR, "Failed to contact rpc.statd at host %s",
365 1.1 scottr hostname);
366 1.1 scottr clnt_destroy(cli);
367 1.1 scottr return (FALSE);
368 1.1 scottr }
369 1.1 scottr clnt_destroy(cli);
370 1.1 scottr return (TRUE);
371 1.1 scottr }
372 1.1 scottr
373 1.1 scottr /* notify_hosts ------------------------------------------------------------ */
374 1.1 scottr /*
375 1.1 scottr * Purpose: Send SM_NOTIFY to all hosts marked as requiring it
376 1.1 scottr * Returns: Nothing, immediately - forks a process to do the work.
377 1.1 scottr * Notes: Does nothing if there are no monitored hosts.
378 1.1 scottr * Called after all the initialisation has been done -
379 1.1 scottr * logs to syslog.
380 1.1 scottr */
381 1.1 scottr void
382 1.1 scottr notify_hosts(void)
383 1.1 scottr {
384 1.1 scottr HostInfo *hp;
385 1.1 scottr int i, attempts;
386 1.1 scottr int work_to_do = FALSE;
387 1.1 scottr pid_t pid;
388 1.1 scottr
389 1.1 scottr /* First check if there is in fact any work to do. */
390 1.1 scottr for (i = status_info->noOfHosts, hp = status_info->hosts; i;
391 1.1 scottr i--, hp++) {
392 1.1 scottr if (hp->notifyReqd) {
393 1.1 scottr work_to_do = TRUE;
394 1.1 scottr break;
395 1.1 scottr }
396 1.1 scottr }
397 1.1 scottr
398 1.1 scottr if (!work_to_do)
399 1.1 scottr return; /* No work found */
400 1.1 scottr
401 1.1 scottr pid = fork();
402 1.1 scottr if (pid == -1) {
403 1.1 scottr syslog(LOG_ERR, "Unable to fork notify process - %s",
404 1.1 scottr strerror(errno));
405 1.1 scottr return;
406 1.1 scottr }
407 1.1 scottr if (pid)
408 1.1 scottr return;
409 1.1 scottr
410 1.1 scottr /*
411 1.1 scottr * Here in the child process. We continue until all the hosts marked
412 1.1 scottr * as requiring notification have been duly notified.
413 1.1 scottr * If one of the initial attempts fails, we sleep for a while and
414 1.1 scottr * have another go. This is necessary because when we have crashed,
415 1.1 scottr * (eg. a power outage) it is quite possible that we won't be able to
416 1.1 scottr * contact all monitored hosts immediately on restart, either because
417 1.1 scottr * they crashed too and take longer to come up (in which case the
418 1.1 scottr * notification isn't really required), or more importantly if some
419 1.1 scottr * router etc. needed to reach the monitored host has not come back
420 1.1 scottr * up yet. In this case, we will be a bit late in re-establishing
421 1.1 scottr * locks (after the grace period) but that is the best we can do.
422 1.1 scottr * We try 10 times at 5 sec intervals, 10 more times at 1 minute
423 1.1 scottr * intervals, then 24 more times at hourly intervals, finally
424 1.1 scottr * giving up altogether if the host hasn't come back to life after
425 1.1 scottr * 24 hours.
426 1.1 scottr */
427 1.1 scottr for (attempts = 0; attempts < 44; attempts++) {
428 1.1 scottr work_to_do = FALSE; /* Unless anything fails */
429 1.1 scottr for (i = status_info->noOfHosts, hp = status_info->hosts; i > 0;
430 1.1 scottr i--, hp++) {
431 1.1 scottr if (hp->notifyReqd) {
432 1.1 scottr if (notify_one_host(hp->hostname)) {
433 1.1 scottr hp->notifyReqd = FALSE;
434 1.1 scottr sync_file();
435 1.1 scottr } else
436 1.1 scottr work_to_do = TRUE;
437 1.1 scottr }
438 1.1 scottr }
439 1.1 scottr if (!work_to_do)
440 1.1 scottr break;
441 1.1 scottr if (attempts < 10)
442 1.1 scottr sleep(5);
443 1.1 scottr else
444 1.1 scottr if (attempts < 20)
445 1.1 scottr sleep(60);
446 1.1 scottr else
447 1.1 scottr sleep(60 * 60);
448 1.1 scottr }
449 1.1 scottr exit(0);
450 1.1 scottr }
451