ip_icmp.c revision 1.70 1 /* $NetBSD: ip_icmp.c,v 1.70 2002/08/14 00:23:30 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.70 2002/08/14 00:23:30 itojun Exp $");
109
110 #include "opt_ipsec.h"
111
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/malloc.h>
115 #include <sys/mbuf.h>
116 #include <sys/protosw.h>
117 #include <sys/socket.h>
118 #include <sys/time.h>
119 #include <sys/kernel.h>
120 #include <sys/syslog.h>
121 #include <sys/sysctl.h>
122
123 #include <net/if.h>
124 #include <net/route.h>
125
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip.h>
130 #include <netinet/ip_icmp.h>
131 #include <netinet/ip_var.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/icmp_var.h>
134
135 #ifdef IPSEC
136 #include <netinet6/ipsec.h>
137 #include <netkey/key.h>
138 #endif
139
140 #include <machine/stdarg.h>
141
142 /*
143 * ICMP routines: error generation, receive packet processing, and
144 * routines to turnaround packets back to the originator, and
145 * host table maintenance routines.
146 */
147
148 int icmpmaskrepl = 0;
149 #ifdef ICMPPRINTFS
150 int icmpprintfs = 0;
151 #endif
152 int icmpreturndatabytes = 8;
153
154 /*
155 * List of callbacks to notify when Path MTU changes are made.
156 */
157 struct icmp_mtudisc_callback {
158 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
159 void (*mc_func) __P((struct in_addr));
160 };
161
162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
163 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
164
165 #if 0
166 static int ip_next_mtu __P((int, int));
167 #else
168 /*static*/ int ip_next_mtu __P((int, int));
169 #endif
170
171 extern int icmperrppslim;
172 static int icmperrpps_count = 0;
173 static struct timeval icmperrppslim_last;
174 static int icmp_rediraccept = 1;
175 static int icmp_redirtimeout = 600;
176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
177
178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
180
181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
182
183
184 void
185 icmp_init()
186 {
187 /*
188 * This is only useful if the user initializes redirtimeout to
189 * something other than zero.
190 */
191 if (icmp_redirtimeout != 0) {
192 icmp_redirect_timeout_q =
193 rt_timer_queue_create(icmp_redirtimeout);
194 }
195 }
196
197 /*
198 * Register a Path MTU Discovery callback.
199 */
200 void
201 icmp_mtudisc_callback_register(func)
202 void (*func) __P((struct in_addr));
203 {
204 struct icmp_mtudisc_callback *mc;
205
206 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
207 mc = LIST_NEXT(mc, mc_list)) {
208 if (mc->mc_func == func)
209 return;
210 }
211
212 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
213 if (mc == NULL)
214 panic("icmp_mtudisc_callback_register");
215
216 mc->mc_func = func;
217 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
218 }
219
220 /*
221 * Generate an error packet of type error
222 * in response to bad packet ip.
223 */
224 void
225 icmp_error(n, type, code, dest, destifp)
226 struct mbuf *n;
227 int type, code;
228 n_long dest;
229 struct ifnet *destifp;
230 {
231 struct ip *oip = mtod(n, struct ip *), *nip;
232 unsigned oiplen = oip->ip_hl << 2;
233 struct icmp *icp;
234 struct mbuf *m;
235 unsigned icmplen, mblen;
236
237 #ifdef ICMPPRINTFS
238 if (icmpprintfs)
239 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
240 #endif
241 if (type != ICMP_REDIRECT)
242 icmpstat.icps_error++;
243 /*
244 * Don't send error if the original packet was encrypted.
245 * Don't send error if not the first fragment of message.
246 * Don't error if the old packet protocol was ICMP
247 * error message, only known informational types.
248 */
249 if (n->m_flags & M_DECRYPTED)
250 goto freeit;
251 if (oip->ip_off &~ htons(IP_MF|IP_DF))
252 goto freeit;
253 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
254 n->m_len >= oiplen + ICMP_MINLEN &&
255 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
256 icmpstat.icps_oldicmp++;
257 goto freeit;
258 }
259 /* Don't send error in response to a multicast or broadcast packet */
260 if (n->m_flags & (M_BCAST|M_MCAST))
261 goto freeit;
262
263 /*
264 * First, do a rate limitation check.
265 */
266 if (icmp_ratelimit(&oip->ip_src, type, code)) {
267 /* XXX stat */
268 goto freeit;
269 }
270
271 /*
272 * Now, formulate icmp message
273 */
274 icmplen = oiplen + min(icmpreturndatabytes,
275 ntohs(oip->ip_len) - oiplen);
276 /*
277 * Defend against mbuf chains shorter than oip->ip_len:
278 */
279 mblen = 0;
280 for (m = n; m && (mblen < icmplen); m = m->m_next)
281 mblen += m->m_len;
282 icmplen = min(mblen, icmplen);
283
284 /*
285 * As we are not required to return everything we have,
286 * we return whatever we can return at ease.
287 *
288 * Note that ICMP datagrams longer than 576 octets are out of spec
289 * according to RFC1812; the limit on icmpreturndatabytes below in
290 * icmp_sysctl will keep things below that limit.
291 */
292
293 KASSERT(ICMP_MINLEN <= MCLBYTES);
294
295 if (icmplen + ICMP_MINLEN > MCLBYTES)
296 icmplen = MCLBYTES - ICMP_MINLEN;
297
298 m = m_gethdr(M_DONTWAIT, MT_HEADER);
299 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
300 MCLGET(m, M_DONTWAIT);
301 if ((m->m_flags & M_EXT) == 0) {
302 m_freem(m);
303 m = NULL;
304 }
305 }
306 if (m == NULL)
307 goto freeit;
308 m->m_len = icmplen + ICMP_MINLEN;
309 if ((m->m_flags & M_EXT) == 0)
310 MH_ALIGN(m, m->m_len);
311 icp = mtod(m, struct icmp *);
312 if ((u_int)type > ICMP_MAXTYPE)
313 panic("icmp_error");
314 icmpstat.icps_outhist[type]++;
315 icp->icmp_type = type;
316 if (type == ICMP_REDIRECT)
317 icp->icmp_gwaddr.s_addr = dest;
318 else {
319 icp->icmp_void = 0;
320 /*
321 * The following assignments assume an overlay with the
322 * zeroed icmp_void field.
323 */
324 if (type == ICMP_PARAMPROB) {
325 icp->icmp_pptr = code;
326 code = 0;
327 } else if (type == ICMP_UNREACH &&
328 code == ICMP_UNREACH_NEEDFRAG && destifp)
329 icp->icmp_nextmtu = htons(destifp->if_mtu);
330 }
331
332 icp->icmp_code = code;
333 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
334 nip = &icp->icmp_ip;
335
336 /*
337 * Now, copy old ip header (without options)
338 * in front of icmp message.
339 */
340 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
341 panic("icmp len");
342 m->m_data -= sizeof(struct ip);
343 m->m_len += sizeof(struct ip);
344 m->m_pkthdr.len = m->m_len;
345 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
346 nip = mtod(m, struct ip *);
347 /* ip_v set in ip_output */
348 nip->ip_hl = sizeof(struct ip) >> 2;
349 nip->ip_tos = 0;
350 nip->ip_len = htons(m->m_len);
351 /* ip_id set in ip_output */
352 nip->ip_off = htons(0);
353 /* ip_ttl set in icmp_reflect */
354 nip->ip_p = IPPROTO_ICMP;
355 nip->ip_src = oip->ip_src;
356 nip->ip_dst = oip->ip_dst;
357 icmp_reflect(m);
358
359 freeit:
360 m_freem(n);
361 }
362
363 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
364 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
365 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
366 struct sockaddr_in icmpmask = { 8, 0 };
367
368 /*
369 * Process a received ICMP message.
370 */
371 void
372 #if __STDC__
373 icmp_input(struct mbuf *m, ...)
374 #else
375 icmp_input(m, va_alist)
376 struct mbuf *m;
377 va_dcl
378 #endif
379 {
380 int proto;
381 struct icmp *icp;
382 struct ip *ip = mtod(m, struct ip *);
383 int icmplen;
384 int i;
385 struct in_ifaddr *ia;
386 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
387 int code;
388 int hlen;
389 va_list ap;
390 struct rtentry *rt;
391
392 va_start(ap, m);
393 hlen = va_arg(ap, int);
394 proto = va_arg(ap, int);
395 va_end(ap);
396
397 /*
398 * Locate icmp structure in mbuf, and check
399 * that not corrupted and of at least minimum length.
400 */
401 icmplen = ntohs(ip->ip_len) - hlen;
402 #ifdef ICMPPRINTFS
403 if (icmpprintfs)
404 printf("icmp_input from %x to %x, len %d\n",
405 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
406 icmplen);
407 #endif
408 if (icmplen < ICMP_MINLEN) {
409 icmpstat.icps_tooshort++;
410 goto freeit;
411 }
412 i = hlen + min(icmplen, ICMP_ADVLENMIN);
413 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
414 icmpstat.icps_tooshort++;
415 return;
416 }
417 ip = mtod(m, struct ip *);
418 m->m_len -= hlen;
419 m->m_data += hlen;
420 icp = mtod(m, struct icmp *);
421 /* Don't need to assert alignment, here. */
422 if (in_cksum(m, icmplen)) {
423 icmpstat.icps_checksum++;
424 goto freeit;
425 }
426 m->m_len += hlen;
427 m->m_data -= hlen;
428
429 #ifdef ICMPPRINTFS
430 /*
431 * Message type specific processing.
432 */
433 if (icmpprintfs)
434 printf("icmp_input, type %d code %d\n", icp->icmp_type,
435 icp->icmp_code);
436 #endif
437 if (icp->icmp_type > ICMP_MAXTYPE)
438 goto raw;
439 icmpstat.icps_inhist[icp->icmp_type]++;
440 code = icp->icmp_code;
441 switch (icp->icmp_type) {
442
443 case ICMP_UNREACH:
444 switch (code) {
445 case ICMP_UNREACH_NET:
446 case ICMP_UNREACH_HOST:
447 case ICMP_UNREACH_PROTOCOL:
448 case ICMP_UNREACH_PORT:
449 case ICMP_UNREACH_SRCFAIL:
450 code += PRC_UNREACH_NET;
451 break;
452
453 case ICMP_UNREACH_NEEDFRAG:
454 code = PRC_MSGSIZE;
455 break;
456
457 case ICMP_UNREACH_NET_UNKNOWN:
458 case ICMP_UNREACH_NET_PROHIB:
459 case ICMP_UNREACH_TOSNET:
460 code = PRC_UNREACH_NET;
461 break;
462
463 case ICMP_UNREACH_HOST_UNKNOWN:
464 case ICMP_UNREACH_ISOLATED:
465 case ICMP_UNREACH_HOST_PROHIB:
466 case ICMP_UNREACH_TOSHOST:
467 code = PRC_UNREACH_HOST;
468 break;
469
470 default:
471 goto badcode;
472 }
473 goto deliver;
474
475 case ICMP_TIMXCEED:
476 if (code > 1)
477 goto badcode;
478 code += PRC_TIMXCEED_INTRANS;
479 goto deliver;
480
481 case ICMP_PARAMPROB:
482 if (code > 1)
483 goto badcode;
484 code = PRC_PARAMPROB;
485 goto deliver;
486
487 case ICMP_SOURCEQUENCH:
488 if (code)
489 goto badcode;
490 code = PRC_QUENCH;
491 goto deliver;
492
493 deliver:
494 /*
495 * Problem with datagram; advise higher level routines.
496 */
497 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
498 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
499 icmpstat.icps_badlen++;
500 goto freeit;
501 }
502 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
503 goto badcode;
504 #ifdef ICMPPRINTFS
505 if (icmpprintfs)
506 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
507 #endif
508 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
509 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
510 if (ctlfunc)
511 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
512 &icp->icmp_ip);
513 break;
514
515 badcode:
516 icmpstat.icps_badcode++;
517 break;
518
519 case ICMP_ECHO:
520 icp->icmp_type = ICMP_ECHOREPLY;
521 goto reflect;
522
523 case ICMP_TSTAMP:
524 if (icmplen < ICMP_TSLEN) {
525 icmpstat.icps_badlen++;
526 break;
527 }
528 icp->icmp_type = ICMP_TSTAMPREPLY;
529 icp->icmp_rtime = iptime();
530 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
531 goto reflect;
532
533 case ICMP_MASKREQ:
534 if (icmpmaskrepl == 0)
535 break;
536 /*
537 * We are not able to respond with all ones broadcast
538 * unless we receive it over a point-to-point interface.
539 */
540 if (icmplen < ICMP_MASKLEN) {
541 icmpstat.icps_badlen++;
542 break;
543 }
544 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
545 in_nullhost(ip->ip_dst))
546 icmpdst.sin_addr = ip->ip_src;
547 else
548 icmpdst.sin_addr = ip->ip_dst;
549 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
550 m->m_pkthdr.rcvif));
551 if (ia == 0)
552 break;
553 icp->icmp_type = ICMP_MASKREPLY;
554 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
555 if (in_nullhost(ip->ip_src)) {
556 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
557 ip->ip_src = ia->ia_broadaddr.sin_addr;
558 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
559 ip->ip_src = ia->ia_dstaddr.sin_addr;
560 }
561 reflect:
562 icmpstat.icps_reflect++;
563 icmpstat.icps_outhist[icp->icmp_type]++;
564 icmp_reflect(m);
565 return;
566
567 case ICMP_REDIRECT:
568 if (code > 3)
569 goto badcode;
570 if (icmp_rediraccept == 0)
571 goto freeit;
572 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
573 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
574 icmpstat.icps_badlen++;
575 break;
576 }
577 /*
578 * Short circuit routing redirects to force
579 * immediate change in the kernel's routing
580 * tables. The message is also handed to anyone
581 * listening on a raw socket (e.g. the routing
582 * daemon for use in updating its tables).
583 */
584 icmpgw.sin_addr = ip->ip_src;
585 icmpdst.sin_addr = icp->icmp_gwaddr;
586 #ifdef ICMPPRINTFS
587 if (icmpprintfs)
588 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
589 icp->icmp_gwaddr);
590 #endif
591 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
592 rt = NULL;
593 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
594 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
595 sintosa(&icmpgw), (struct rtentry **)&rt);
596 if (rt != NULL && icmp_redirtimeout != 0) {
597 i = rt_timer_add(rt, icmp_redirect_timeout,
598 icmp_redirect_timeout_q);
599 if (i)
600 log(LOG_ERR, "ICMP: redirect failed to "
601 "register timeout for route to %x, "
602 "code %d\n",
603 icp->icmp_ip.ip_dst.s_addr, i);
604 }
605 if (rt != NULL)
606 rtfree(rt);
607
608 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
609 #ifdef IPSEC
610 key_sa_routechange((struct sockaddr *)&icmpsrc);
611 #endif
612 break;
613
614 /*
615 * No kernel processing for the following;
616 * just fall through to send to raw listener.
617 */
618 case ICMP_ECHOREPLY:
619 case ICMP_ROUTERADVERT:
620 case ICMP_ROUTERSOLICIT:
621 case ICMP_TSTAMPREPLY:
622 case ICMP_IREQREPLY:
623 case ICMP_MASKREPLY:
624 default:
625 break;
626 }
627
628 raw:
629 rip_input(m, hlen, proto);
630 return;
631
632 freeit:
633 m_freem(m);
634 return;
635 }
636
637 /*
638 * Reflect the ip packet back to the source
639 */
640 void
641 icmp_reflect(m)
642 struct mbuf *m;
643 {
644 struct ip *ip = mtod(m, struct ip *);
645 struct in_ifaddr *ia;
646 struct ifaddr *ifa;
647 struct sockaddr_in *sin = 0;
648 struct in_addr t;
649 struct mbuf *opts = 0;
650 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
651
652 if (!in_canforward(ip->ip_src) &&
653 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
654 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
655 m_freem(m); /* Bad return address */
656 goto done; /* ip_output() will check for broadcast */
657 }
658 t = ip->ip_dst;
659 ip->ip_dst = ip->ip_src;
660 /*
661 * If the incoming packet was addressed directly to us, use
662 * dst as the src for the reply. Otherwise (broadcast or
663 * anonymous), use an address which corresponds to the
664 * incoming interface, with a preference for the address which
665 * corresponds to the route to the destination of the ICMP.
666 */
667
668 /* Look for packet addressed to us */
669 INADDR_TO_IA(t, ia);
670
671 /* look for packet sent to broadcast address */
672 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
673 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
674 if (ifa->ifa_addr->sa_family != AF_INET)
675 continue;
676 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
677 ia = ifatoia(ifa);
678 break;
679 }
680 }
681 }
682
683 if (ia)
684 sin = &ia->ia_addr;
685
686 icmpdst.sin_addr = t;
687
688 /*
689 * if the packet is addressed somewhere else, compute the
690 * source address for packets routed back to the source, and
691 * use that, if it's an address on the interface which
692 * received the packet
693 */
694 if (sin == (struct sockaddr_in *)0) {
695 struct sockaddr_in sin_dst;
696 struct route icmproute;
697 int errornum;
698
699 sin_dst.sin_family = AF_INET;
700 sin_dst.sin_len = sizeof(struct sockaddr_in);
701 sin_dst.sin_addr = ip->ip_dst;
702 bzero(&icmproute, sizeof(icmproute));
703 errornum = 0;
704 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
705 /* errornum is never used */
706 if (icmproute.ro_rt)
707 RTFREE(icmproute.ro_rt);
708 /* check to make sure sin is a source address on rcvif */
709 if (sin) {
710 t = sin->sin_addr;
711 sin = (struct sockaddr_in *)0;
712 INADDR_TO_IA(t, ia);
713 while (ia) {
714 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
715 sin = &ia->ia_addr;
716 break;
717 }
718 NEXT_IA_WITH_SAME_ADDR(ia);
719 }
720 }
721 }
722
723 /*
724 * if it was not addressed to us, but the route doesn't go out
725 * the source interface, pick an address on the source
726 * interface. This can happen when routing is asymmetric, or
727 * when the incoming packet was encapsulated
728 */
729 if (sin == (struct sockaddr_in *)0) {
730 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
731 if (ifa->ifa_addr->sa_family != AF_INET)
732 continue;
733 sin = &(ifatoia(ifa)->ia_addr);
734 break;
735 }
736 }
737
738 /*
739 * The following happens if the packet was not addressed to us,
740 * and was received on an interface with no IP address:
741 * We find the first AF_INET address on the first non-loopback
742 * interface.
743 */
744 if (sin == (struct sockaddr_in *)0)
745 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
746 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
747 continue;
748 sin = &ia->ia_addr;
749 break;
750 }
751
752 /*
753 * If we still didn't find an address, punt. We could have an
754 * interface up (and receiving packets) with no address.
755 */
756 if (sin == (struct sockaddr_in *)0) {
757 m_freem(m);
758 goto done;
759 }
760
761 ip->ip_src = sin->sin_addr;
762 ip->ip_ttl = MAXTTL;
763
764 if (optlen > 0) {
765 u_char *cp;
766 int opt, cnt;
767 u_int len;
768
769 /*
770 * Retrieve any source routing from the incoming packet;
771 * add on any record-route or timestamp options.
772 */
773 cp = (u_char *) (ip + 1);
774 if ((opts = ip_srcroute()) == 0 &&
775 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
776 opts->m_len = sizeof(struct in_addr);
777 *mtod(opts, struct in_addr *) = zeroin_addr;
778 }
779 if (opts) {
780 #ifdef ICMPPRINTFS
781 if (icmpprintfs)
782 printf("icmp_reflect optlen %d rt %d => ",
783 optlen, opts->m_len);
784 #endif
785 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
786 opt = cp[IPOPT_OPTVAL];
787 if (opt == IPOPT_EOL)
788 break;
789 if (opt == IPOPT_NOP)
790 len = 1;
791 else {
792 if (cnt < IPOPT_OLEN + sizeof(*cp))
793 break;
794 len = cp[IPOPT_OLEN];
795 if (len < IPOPT_OLEN + sizeof(*cp) ||
796 len > cnt)
797 break;
798 }
799 /*
800 * Should check for overflow, but it "can't happen"
801 */
802 if (opt == IPOPT_RR || opt == IPOPT_TS ||
803 opt == IPOPT_SECURITY) {
804 bcopy((caddr_t)cp,
805 mtod(opts, caddr_t) + opts->m_len, len);
806 opts->m_len += len;
807 }
808 }
809 /* Terminate & pad, if necessary */
810 if ((cnt = opts->m_len % 4) != 0) {
811 for (; cnt < 4; cnt++) {
812 *(mtod(opts, caddr_t) + opts->m_len) =
813 IPOPT_EOL;
814 opts->m_len++;
815 }
816 }
817 #ifdef ICMPPRINTFS
818 if (icmpprintfs)
819 printf("%d\n", opts->m_len);
820 #endif
821 }
822 /*
823 * Now strip out original options by copying rest of first
824 * mbuf's data back, and adjust the IP length.
825 */
826 ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
827 ip->ip_hl = sizeof(struct ip) >> 2;
828 m->m_len -= optlen;
829 if (m->m_flags & M_PKTHDR)
830 m->m_pkthdr.len -= optlen;
831 optlen += sizeof(struct ip);
832 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
833 (unsigned)(m->m_len - sizeof(struct ip)));
834 }
835 m->m_flags &= ~(M_BCAST|M_MCAST);
836 icmp_send(m, opts);
837 done:
838 if (opts)
839 (void)m_free(opts);
840 }
841
842 /*
843 * Send an icmp packet back to the ip level,
844 * after supplying a checksum.
845 */
846 void
847 icmp_send(m, opts)
848 struct mbuf *m;
849 struct mbuf *opts;
850 {
851 struct ip *ip = mtod(m, struct ip *);
852 int hlen;
853 struct icmp *icp;
854
855 hlen = ip->ip_hl << 2;
856 m->m_data += hlen;
857 m->m_len -= hlen;
858 icp = mtod(m, struct icmp *);
859 icp->icmp_cksum = 0;
860 icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
861 m->m_data -= hlen;
862 m->m_len += hlen;
863 #ifdef ICMPPRINTFS
864 if (icmpprintfs)
865 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
866 #endif
867 #ifdef IPSEC
868 /* Don't lookup socket */
869 (void)ipsec_setsocket(m, NULL);
870 #endif
871 (void) ip_output(m, opts, NULL, 0, NULL);
872 }
873
874 n_time
875 iptime()
876 {
877 struct timeval atv;
878 u_long t;
879
880 microtime(&atv);
881 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
882 return (htonl(t));
883 }
884
885 int
886 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
887 int *name;
888 u_int namelen;
889 void *oldp;
890 size_t *oldlenp;
891 void *newp;
892 size_t newlen;
893 {
894 int arg, error;
895
896 /* All sysctl names at this level are terminal. */
897 if (namelen != 1)
898 return (ENOTDIR);
899
900 switch (name[0])
901 {
902 case ICMPCTL_MASKREPL:
903 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
904 break;
905 case ICMPCTL_RETURNDATABYTES:
906 arg = icmpreturndatabytes;
907 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
908 if (error)
909 break;
910 if ((arg >= 8) || (arg <= 512))
911 icmpreturndatabytes = arg;
912 else
913 error = EINVAL;
914 break;
915 case ICMPCTL_ERRPPSLIMIT:
916 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
917 break;
918 case ICMPCTL_REDIRACCEPT:
919 error = sysctl_int(oldp, oldlenp, newp, newlen,
920 &icmp_rediraccept);
921 break;
922 case ICMPCTL_REDIRTIMEOUT:
923 error = sysctl_int(oldp, oldlenp, newp, newlen,
924 &icmp_redirtimeout);
925 if (icmp_redirect_timeout_q != NULL) {
926 if (icmp_redirtimeout == 0) {
927 rt_timer_queue_destroy(icmp_redirect_timeout_q,
928 TRUE);
929 icmp_redirect_timeout_q = NULL;
930 } else {
931 rt_timer_queue_change(icmp_redirect_timeout_q,
932 icmp_redirtimeout);
933 }
934 } else if (icmp_redirtimeout > 0) {
935 icmp_redirect_timeout_q =
936 rt_timer_queue_create(icmp_redirtimeout);
937 }
938 return (error);
939
940 break;
941 default:
942 error = ENOPROTOOPT;
943 break;
944 }
945 return error;
946 }
947
948 /* Table of common MTUs: */
949
950 static const u_int mtu_table[] = {
951 65535, 65280, 32000, 17914, 9180, 8166,
952 4352, 2002, 1492, 1006, 508, 296, 68, 0
953 };
954
955 void
956 icmp_mtudisc(icp, faddr)
957 struct icmp *icp;
958 struct in_addr faddr;
959 {
960 struct icmp_mtudisc_callback *mc;
961 struct sockaddr *dst = sintosa(&icmpsrc);
962 struct rtentry *rt;
963 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
964 int error;
965
966 rt = rtalloc1(dst, 1);
967 if (rt == 0)
968 return;
969
970 /* If we didn't get a host route, allocate one */
971
972 if ((rt->rt_flags & RTF_HOST) == 0) {
973 struct rtentry *nrt;
974
975 error = rtrequest((int) RTM_ADD, dst,
976 (struct sockaddr *) rt->rt_gateway,
977 (struct sockaddr *) 0,
978 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
979 if (error) {
980 rtfree(rt);
981 return;
982 }
983 nrt->rt_rmx = rt->rt_rmx;
984 rtfree(rt);
985 rt = nrt;
986 }
987 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
988 if (error) {
989 rtfree(rt);
990 return;
991 }
992
993 if (mtu == 0) {
994 int i = 0;
995
996 mtu = ntohs(icp->icmp_ip.ip_len);
997 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
998 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
999 mtu -= (icp->icmp_ip.ip_hl << 2);
1000
1001 /* If we still can't guess a value, try the route */
1002
1003 if (mtu == 0) {
1004 mtu = rt->rt_rmx.rmx_mtu;
1005
1006 /* If no route mtu, default to the interface mtu */
1007
1008 if (mtu == 0)
1009 mtu = rt->rt_ifp->if_mtu;
1010 }
1011
1012 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1013 if (mtu > mtu_table[i]) {
1014 mtu = mtu_table[i];
1015 break;
1016 }
1017 }
1018
1019 /*
1020 * XXX: RTV_MTU is overloaded, since the admin can set it
1021 * to turn off PMTU for a route, and the kernel can
1022 * set it to indicate a serious problem with PMTU
1023 * on a route. We should be using a separate flag
1024 * for the kernel to indicate this.
1025 */
1026
1027 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1028 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1029 rt->rt_rmx.rmx_locks |= RTV_MTU;
1030 else if (rt->rt_rmx.rmx_mtu > mtu ||
1031 rt->rt_rmx.rmx_mtu == 0) {
1032 icmpstat.icps_pmtuchg++;
1033 rt->rt_rmx.rmx_mtu = mtu;
1034 }
1035 }
1036
1037 if (rt)
1038 rtfree(rt);
1039
1040 /*
1041 * Notify protocols that the MTU for this destination
1042 * has changed.
1043 */
1044 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1045 mc = LIST_NEXT(mc, mc_list))
1046 (*mc->mc_func)(faddr);
1047 }
1048
1049 /*
1050 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1051 * given current value MTU. If DIR is less than zero, a larger plateau
1052 * is returned; otherwise, a smaller value is returned.
1053 */
1054 int
1055 ip_next_mtu(mtu, dir) /* XXX */
1056 int mtu;
1057 int dir;
1058 {
1059 int i;
1060
1061 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1062 if (mtu >= mtu_table[i])
1063 break;
1064 }
1065
1066 if (dir < 0) {
1067 if (i == 0) {
1068 return 0;
1069 } else {
1070 return mtu_table[i - 1];
1071 }
1072 } else {
1073 if (mtu_table[i] == 0) {
1074 return 0;
1075 } else if (mtu > mtu_table[i]) {
1076 return mtu_table[i];
1077 } else {
1078 return mtu_table[i + 1];
1079 }
1080 }
1081 }
1082
1083 static void
1084 icmp_mtudisc_timeout(rt, r)
1085 struct rtentry *rt;
1086 struct rttimer *r;
1087 {
1088 if (rt == NULL)
1089 panic("icmp_mtudisc_timeout: bad route to timeout");
1090 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1091 (RTF_DYNAMIC | RTF_HOST)) {
1092 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1093 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1094 } else {
1095 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1096 rt->rt_rmx.rmx_mtu = 0;
1097 }
1098 }
1099 }
1100
1101 static void
1102 icmp_redirect_timeout(rt, r)
1103 struct rtentry *rt;
1104 struct rttimer *r;
1105 {
1106 if (rt == NULL)
1107 panic("icmp_redirect_timeout: bad route to timeout");
1108 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1109 (RTF_DYNAMIC | RTF_HOST)) {
1110 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1111 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1112 }
1113 }
1114
1115 /*
1116 * Perform rate limit check.
1117 * Returns 0 if it is okay to send the icmp packet.
1118 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1119 * limitation.
1120 *
1121 * XXX per-destination/type check necessary?
1122 */
1123 static int
1124 icmp_ratelimit(dst, type, code)
1125 const struct in_addr *dst;
1126 const int type; /* not used at this moment */
1127 const int code; /* not used at this moment */
1128 {
1129
1130 /* PPS limit */
1131 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1132 icmperrppslim)) {
1133 /* The packet is subject to rate limit */
1134 return 1;
1135 }
1136
1137 /*okay to send*/
1138 return 0;
1139 }
1140