ip_icmp.c revision 1.72 1 /* $NetBSD: ip_icmp.c,v 1.72 2003/02/26 06:31:14 matt Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.72 2003/02/26 06:31:14 matt Exp $");
109
110 #include "opt_ipsec.h"
111
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/malloc.h>
115 #include <sys/mbuf.h>
116 #include <sys/protosw.h>
117 #include <sys/socket.h>
118 #include <sys/time.h>
119 #include <sys/kernel.h>
120 #include <sys/syslog.h>
121 #include <sys/sysctl.h>
122
123 #include <net/if.h>
124 #include <net/route.h>
125
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip.h>
130 #include <netinet/ip_icmp.h>
131 #include <netinet/ip_var.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/icmp_var.h>
134
135 #ifdef IPSEC
136 #include <netinet6/ipsec.h>
137 #include <netkey/key.h>
138 #endif
139
140 #include <machine/stdarg.h>
141
142 /*
143 * ICMP routines: error generation, receive packet processing, and
144 * routines to turnaround packets back to the originator, and
145 * host table maintenance routines.
146 */
147
148 int icmpmaskrepl = 0;
149 #ifdef ICMPPRINTFS
150 int icmpprintfs = 0;
151 #endif
152 int icmpreturndatabytes = 8;
153
154 /*
155 * List of callbacks to notify when Path MTU changes are made.
156 */
157 struct icmp_mtudisc_callback {
158 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
159 void (*mc_func) __P((struct in_addr));
160 };
161
162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
163 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
164
165 #if 0
166 static int ip_next_mtu __P((int, int));
167 #else
168 /*static*/ int ip_next_mtu __P((int, int));
169 #endif
170
171 extern int icmperrppslim;
172 static int icmperrpps_count = 0;
173 static struct timeval icmperrppslim_last;
174 static int icmp_rediraccept = 1;
175 static int icmp_redirtimeout = 600;
176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
177
178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
180
181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
182
183
184 void
185 icmp_init()
186 {
187 /*
188 * This is only useful if the user initializes redirtimeout to
189 * something other than zero.
190 */
191 if (icmp_redirtimeout != 0) {
192 icmp_redirect_timeout_q =
193 rt_timer_queue_create(icmp_redirtimeout);
194 }
195 }
196
197 /*
198 * Register a Path MTU Discovery callback.
199 */
200 void
201 icmp_mtudisc_callback_register(func)
202 void (*func) __P((struct in_addr));
203 {
204 struct icmp_mtudisc_callback *mc;
205
206 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
207 mc = LIST_NEXT(mc, mc_list)) {
208 if (mc->mc_func == func)
209 return;
210 }
211
212 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
213 if (mc == NULL)
214 panic("icmp_mtudisc_callback_register");
215
216 mc->mc_func = func;
217 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
218 }
219
220 /*
221 * Generate an error packet of type error
222 * in response to bad packet ip.
223 */
224 void
225 icmp_error(n, type, code, dest, destifp)
226 struct mbuf *n;
227 int type, code;
228 n_long dest;
229 struct ifnet *destifp;
230 {
231 struct ip *oip = mtod(n, struct ip *), *nip;
232 unsigned oiplen = oip->ip_hl << 2;
233 struct icmp *icp;
234 struct mbuf *m;
235 unsigned icmplen, mblen;
236
237 #ifdef ICMPPRINTFS
238 if (icmpprintfs)
239 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
240 #endif
241 if (type != ICMP_REDIRECT)
242 icmpstat.icps_error++;
243 /*
244 * Don't send error if the original packet was encrypted.
245 * Don't send error if not the first fragment of message.
246 * Don't error if the old packet protocol was ICMP
247 * error message, only known informational types.
248 */
249 if (n->m_flags & M_DECRYPTED)
250 goto freeit;
251 if (oip->ip_off &~ htons(IP_MF|IP_DF))
252 goto freeit;
253 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
254 n->m_len >= oiplen + ICMP_MINLEN &&
255 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
256 icmpstat.icps_oldicmp++;
257 goto freeit;
258 }
259 /* Don't send error in response to a multicast or broadcast packet */
260 if (n->m_flags & (M_BCAST|M_MCAST))
261 goto freeit;
262
263 /*
264 * First, do a rate limitation check.
265 */
266 if (icmp_ratelimit(&oip->ip_src, type, code)) {
267 /* XXX stat */
268 goto freeit;
269 }
270
271 /*
272 * Now, formulate icmp message
273 */
274 icmplen = oiplen + min(icmpreturndatabytes,
275 ntohs(oip->ip_len) - oiplen);
276 /*
277 * Defend against mbuf chains shorter than oip->ip_len:
278 */
279 mblen = 0;
280 for (m = n; m && (mblen < icmplen); m = m->m_next)
281 mblen += m->m_len;
282 icmplen = min(mblen, icmplen);
283
284 /*
285 * As we are not required to return everything we have,
286 * we return whatever we can return at ease.
287 *
288 * Note that ICMP datagrams longer than 576 octets are out of spec
289 * according to RFC1812; the limit on icmpreturndatabytes below in
290 * icmp_sysctl will keep things below that limit.
291 */
292
293 KASSERT(ICMP_MINLEN <= MCLBYTES);
294
295 if (icmplen + ICMP_MINLEN > MCLBYTES)
296 icmplen = MCLBYTES - ICMP_MINLEN;
297
298 m = m_gethdr(M_DONTWAIT, MT_HEADER);
299 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
300 MCLGET(m, M_DONTWAIT);
301 if ((m->m_flags & M_EXT) == 0) {
302 m_freem(m);
303 m = NULL;
304 }
305 }
306 if (m == NULL)
307 goto freeit;
308 MCLAIM(m, n->m_owner);
309 m->m_len = icmplen + ICMP_MINLEN;
310 if ((m->m_flags & M_EXT) == 0)
311 MH_ALIGN(m, m->m_len);
312 icp = mtod(m, struct icmp *);
313 if ((u_int)type > ICMP_MAXTYPE)
314 panic("icmp_error");
315 icmpstat.icps_outhist[type]++;
316 icp->icmp_type = type;
317 if (type == ICMP_REDIRECT)
318 icp->icmp_gwaddr.s_addr = dest;
319 else {
320 icp->icmp_void = 0;
321 /*
322 * The following assignments assume an overlay with the
323 * zeroed icmp_void field.
324 */
325 if (type == ICMP_PARAMPROB) {
326 icp->icmp_pptr = code;
327 code = 0;
328 } else if (type == ICMP_UNREACH &&
329 code == ICMP_UNREACH_NEEDFRAG && destifp)
330 icp->icmp_nextmtu = htons(destifp->if_mtu);
331 }
332
333 icp->icmp_code = code;
334 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
335 nip = &icp->icmp_ip;
336
337 /*
338 * Now, copy old ip header (without options)
339 * in front of icmp message.
340 */
341 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
342 panic("icmp len");
343 m->m_data -= sizeof(struct ip);
344 m->m_len += sizeof(struct ip);
345 m->m_pkthdr.len = m->m_len;
346 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
347 nip = mtod(m, struct ip *);
348 /* ip_v set in ip_output */
349 nip->ip_hl = sizeof(struct ip) >> 2;
350 nip->ip_tos = 0;
351 nip->ip_len = htons(m->m_len);
352 /* ip_id set in ip_output */
353 nip->ip_off = htons(0);
354 /* ip_ttl set in icmp_reflect */
355 nip->ip_p = IPPROTO_ICMP;
356 nip->ip_src = oip->ip_src;
357 nip->ip_dst = oip->ip_dst;
358 icmp_reflect(m);
359
360 freeit:
361 m_freem(n);
362 }
363
364 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
365 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
366 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
367 struct sockaddr_in icmpmask = { 8, 0 };
368
369 /*
370 * Process a received ICMP message.
371 */
372 void
373 #if __STDC__
374 icmp_input(struct mbuf *m, ...)
375 #else
376 icmp_input(m, va_alist)
377 struct mbuf *m;
378 va_dcl
379 #endif
380 {
381 int proto;
382 struct icmp *icp;
383 struct ip *ip = mtod(m, struct ip *);
384 int icmplen;
385 int i;
386 struct in_ifaddr *ia;
387 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
388 int code;
389 int hlen;
390 va_list ap;
391 struct rtentry *rt;
392
393 va_start(ap, m);
394 hlen = va_arg(ap, int);
395 proto = va_arg(ap, int);
396 va_end(ap);
397
398 /*
399 * Locate icmp structure in mbuf, and check
400 * that not corrupted and of at least minimum length.
401 */
402 icmplen = ntohs(ip->ip_len) - hlen;
403 #ifdef ICMPPRINTFS
404 if (icmpprintfs)
405 printf("icmp_input from %x to %x, len %d\n",
406 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
407 icmplen);
408 #endif
409 if (icmplen < ICMP_MINLEN) {
410 icmpstat.icps_tooshort++;
411 goto freeit;
412 }
413 i = hlen + min(icmplen, ICMP_ADVLENMIN);
414 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
415 icmpstat.icps_tooshort++;
416 return;
417 }
418 ip = mtod(m, struct ip *);
419 m->m_len -= hlen;
420 m->m_data += hlen;
421 icp = mtod(m, struct icmp *);
422 /* Don't need to assert alignment, here. */
423 if (in_cksum(m, icmplen)) {
424 icmpstat.icps_checksum++;
425 goto freeit;
426 }
427 m->m_len += hlen;
428 m->m_data -= hlen;
429
430 #ifdef ICMPPRINTFS
431 /*
432 * Message type specific processing.
433 */
434 if (icmpprintfs)
435 printf("icmp_input, type %d code %d\n", icp->icmp_type,
436 icp->icmp_code);
437 #endif
438 if (icp->icmp_type > ICMP_MAXTYPE)
439 goto raw;
440 icmpstat.icps_inhist[icp->icmp_type]++;
441 code = icp->icmp_code;
442 switch (icp->icmp_type) {
443
444 case ICMP_UNREACH:
445 switch (code) {
446 case ICMP_UNREACH_NET:
447 case ICMP_UNREACH_HOST:
448 case ICMP_UNREACH_PROTOCOL:
449 case ICMP_UNREACH_PORT:
450 case ICMP_UNREACH_SRCFAIL:
451 code += PRC_UNREACH_NET;
452 break;
453
454 case ICMP_UNREACH_NEEDFRAG:
455 code = PRC_MSGSIZE;
456 break;
457
458 case ICMP_UNREACH_NET_UNKNOWN:
459 case ICMP_UNREACH_NET_PROHIB:
460 case ICMP_UNREACH_TOSNET:
461 code = PRC_UNREACH_NET;
462 break;
463
464 case ICMP_UNREACH_HOST_UNKNOWN:
465 case ICMP_UNREACH_ISOLATED:
466 case ICMP_UNREACH_HOST_PROHIB:
467 case ICMP_UNREACH_TOSHOST:
468 code = PRC_UNREACH_HOST;
469 break;
470
471 default:
472 goto badcode;
473 }
474 goto deliver;
475
476 case ICMP_TIMXCEED:
477 if (code > 1)
478 goto badcode;
479 code += PRC_TIMXCEED_INTRANS;
480 goto deliver;
481
482 case ICMP_PARAMPROB:
483 if (code > 1)
484 goto badcode;
485 code = PRC_PARAMPROB;
486 goto deliver;
487
488 case ICMP_SOURCEQUENCH:
489 if (code)
490 goto badcode;
491 code = PRC_QUENCH;
492 goto deliver;
493
494 deliver:
495 /*
496 * Problem with datagram; advise higher level routines.
497 */
498 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
499 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
500 icmpstat.icps_badlen++;
501 goto freeit;
502 }
503 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
504 goto badcode;
505 #ifdef ICMPPRINTFS
506 if (icmpprintfs)
507 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
508 #endif
509 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
510 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
511 if (ctlfunc)
512 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
513 &icp->icmp_ip);
514 break;
515
516 badcode:
517 icmpstat.icps_badcode++;
518 break;
519
520 case ICMP_ECHO:
521 icp->icmp_type = ICMP_ECHOREPLY;
522 goto reflect;
523
524 case ICMP_TSTAMP:
525 if (icmplen < ICMP_TSLEN) {
526 icmpstat.icps_badlen++;
527 break;
528 }
529 icp->icmp_type = ICMP_TSTAMPREPLY;
530 icp->icmp_rtime = iptime();
531 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
532 goto reflect;
533
534 case ICMP_MASKREQ:
535 if (icmpmaskrepl == 0)
536 break;
537 /*
538 * We are not able to respond with all ones broadcast
539 * unless we receive it over a point-to-point interface.
540 */
541 if (icmplen < ICMP_MASKLEN) {
542 icmpstat.icps_badlen++;
543 break;
544 }
545 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
546 in_nullhost(ip->ip_dst))
547 icmpdst.sin_addr = ip->ip_src;
548 else
549 icmpdst.sin_addr = ip->ip_dst;
550 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
551 m->m_pkthdr.rcvif));
552 if (ia == 0)
553 break;
554 icp->icmp_type = ICMP_MASKREPLY;
555 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
556 if (in_nullhost(ip->ip_src)) {
557 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
558 ip->ip_src = ia->ia_broadaddr.sin_addr;
559 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
560 ip->ip_src = ia->ia_dstaddr.sin_addr;
561 }
562 reflect:
563 icmpstat.icps_reflect++;
564 icmpstat.icps_outhist[icp->icmp_type]++;
565 icmp_reflect(m);
566 return;
567
568 case ICMP_REDIRECT:
569 if (code > 3)
570 goto badcode;
571 if (icmp_rediraccept == 0)
572 goto freeit;
573 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
574 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
575 icmpstat.icps_badlen++;
576 break;
577 }
578 /*
579 * Short circuit routing redirects to force
580 * immediate change in the kernel's routing
581 * tables. The message is also handed to anyone
582 * listening on a raw socket (e.g. the routing
583 * daemon for use in updating its tables).
584 */
585 icmpgw.sin_addr = ip->ip_src;
586 icmpdst.sin_addr = icp->icmp_gwaddr;
587 #ifdef ICMPPRINTFS
588 if (icmpprintfs)
589 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
590 icp->icmp_gwaddr);
591 #endif
592 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
593 rt = NULL;
594 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
595 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
596 sintosa(&icmpgw), (struct rtentry **)&rt);
597 if (rt != NULL && icmp_redirtimeout != 0) {
598 i = rt_timer_add(rt, icmp_redirect_timeout,
599 icmp_redirect_timeout_q);
600 if (i)
601 log(LOG_ERR, "ICMP: redirect failed to "
602 "register timeout for route to %x, "
603 "code %d\n",
604 icp->icmp_ip.ip_dst.s_addr, i);
605 }
606 if (rt != NULL)
607 rtfree(rt);
608
609 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
610 #ifdef IPSEC
611 key_sa_routechange((struct sockaddr *)&icmpsrc);
612 #endif
613 break;
614
615 /*
616 * No kernel processing for the following;
617 * just fall through to send to raw listener.
618 */
619 case ICMP_ECHOREPLY:
620 case ICMP_ROUTERADVERT:
621 case ICMP_ROUTERSOLICIT:
622 case ICMP_TSTAMPREPLY:
623 case ICMP_IREQREPLY:
624 case ICMP_MASKREPLY:
625 default:
626 break;
627 }
628
629 raw:
630 rip_input(m, hlen, proto);
631 return;
632
633 freeit:
634 m_freem(m);
635 return;
636 }
637
638 /*
639 * Reflect the ip packet back to the source
640 */
641 void
642 icmp_reflect(m)
643 struct mbuf *m;
644 {
645 struct ip *ip = mtod(m, struct ip *);
646 struct in_ifaddr *ia;
647 struct ifaddr *ifa;
648 struct sockaddr_in *sin = 0;
649 struct in_addr t;
650 struct mbuf *opts = 0;
651 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
652
653 if (!in_canforward(ip->ip_src) &&
654 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
655 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
656 m_freem(m); /* Bad return address */
657 goto done; /* ip_output() will check for broadcast */
658 }
659 t = ip->ip_dst;
660 ip->ip_dst = ip->ip_src;
661 /*
662 * If the incoming packet was addressed directly to us, use
663 * dst as the src for the reply. Otherwise (broadcast or
664 * anonymous), use an address which corresponds to the
665 * incoming interface, with a preference for the address which
666 * corresponds to the route to the destination of the ICMP.
667 */
668
669 /* Look for packet addressed to us */
670 INADDR_TO_IA(t, ia);
671
672 /* look for packet sent to broadcast address */
673 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
674 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
675 if (ifa->ifa_addr->sa_family != AF_INET)
676 continue;
677 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
678 ia = ifatoia(ifa);
679 break;
680 }
681 }
682 }
683
684 if (ia)
685 sin = &ia->ia_addr;
686
687 icmpdst.sin_addr = t;
688
689 /*
690 * if the packet is addressed somewhere else, compute the
691 * source address for packets routed back to the source, and
692 * use that, if it's an address on the interface which
693 * received the packet
694 */
695 if (sin == (struct sockaddr_in *)0) {
696 struct sockaddr_in sin_dst;
697 struct route icmproute;
698 int errornum;
699
700 sin_dst.sin_family = AF_INET;
701 sin_dst.sin_len = sizeof(struct sockaddr_in);
702 sin_dst.sin_addr = ip->ip_dst;
703 bzero(&icmproute, sizeof(icmproute));
704 errornum = 0;
705 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
706 /* errornum is never used */
707 if (icmproute.ro_rt)
708 RTFREE(icmproute.ro_rt);
709 /* check to make sure sin is a source address on rcvif */
710 if (sin) {
711 t = sin->sin_addr;
712 sin = (struct sockaddr_in *)0;
713 INADDR_TO_IA(t, ia);
714 while (ia) {
715 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
716 sin = &ia->ia_addr;
717 break;
718 }
719 NEXT_IA_WITH_SAME_ADDR(ia);
720 }
721 }
722 }
723
724 /*
725 * if it was not addressed to us, but the route doesn't go out
726 * the source interface, pick an address on the source
727 * interface. This can happen when routing is asymmetric, or
728 * when the incoming packet was encapsulated
729 */
730 if (sin == (struct sockaddr_in *)0) {
731 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
732 if (ifa->ifa_addr->sa_family != AF_INET)
733 continue;
734 sin = &(ifatoia(ifa)->ia_addr);
735 break;
736 }
737 }
738
739 /*
740 * The following happens if the packet was not addressed to us,
741 * and was received on an interface with no IP address:
742 * We find the first AF_INET address on the first non-loopback
743 * interface.
744 */
745 if (sin == (struct sockaddr_in *)0)
746 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
747 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
748 continue;
749 sin = &ia->ia_addr;
750 break;
751 }
752
753 /*
754 * If we still didn't find an address, punt. We could have an
755 * interface up (and receiving packets) with no address.
756 */
757 if (sin == (struct sockaddr_in *)0) {
758 m_freem(m);
759 goto done;
760 }
761
762 ip->ip_src = sin->sin_addr;
763 ip->ip_ttl = MAXTTL;
764
765 if (optlen > 0) {
766 u_char *cp;
767 int opt, cnt;
768 u_int len;
769
770 /*
771 * Retrieve any source routing from the incoming packet;
772 * add on any record-route or timestamp options.
773 */
774 cp = (u_char *) (ip + 1);
775 if ((opts = ip_srcroute()) == 0 &&
776 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
777 MCLAIM(opts, m->m_owner);
778 opts->m_len = sizeof(struct in_addr);
779 *mtod(opts, struct in_addr *) = zeroin_addr;
780 }
781 if (opts) {
782 #ifdef ICMPPRINTFS
783 if (icmpprintfs)
784 printf("icmp_reflect optlen %d rt %d => ",
785 optlen, opts->m_len);
786 #endif
787 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
788 opt = cp[IPOPT_OPTVAL];
789 if (opt == IPOPT_EOL)
790 break;
791 if (opt == IPOPT_NOP)
792 len = 1;
793 else {
794 if (cnt < IPOPT_OLEN + sizeof(*cp))
795 break;
796 len = cp[IPOPT_OLEN];
797 if (len < IPOPT_OLEN + sizeof(*cp) ||
798 len > cnt)
799 break;
800 }
801 /*
802 * Should check for overflow, but it "can't happen"
803 */
804 if (opt == IPOPT_RR || opt == IPOPT_TS ||
805 opt == IPOPT_SECURITY) {
806 bcopy((caddr_t)cp,
807 mtod(opts, caddr_t) + opts->m_len, len);
808 opts->m_len += len;
809 }
810 }
811 /* Terminate & pad, if necessary */
812 if ((cnt = opts->m_len % 4) != 0) {
813 for (; cnt < 4; cnt++) {
814 *(mtod(opts, caddr_t) + opts->m_len) =
815 IPOPT_EOL;
816 opts->m_len++;
817 }
818 }
819 #ifdef ICMPPRINTFS
820 if (icmpprintfs)
821 printf("%d\n", opts->m_len);
822 #endif
823 }
824 /*
825 * Now strip out original options by copying rest of first
826 * mbuf's data back, and adjust the IP length.
827 */
828 ip->ip_len = htons(ntohs(ip->ip_len) - optlen);
829 ip->ip_hl = sizeof(struct ip) >> 2;
830 m->m_len -= optlen;
831 if (m->m_flags & M_PKTHDR)
832 m->m_pkthdr.len -= optlen;
833 optlen += sizeof(struct ip);
834 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
835 (unsigned)(m->m_len - sizeof(struct ip)));
836 }
837 m->m_flags &= ~(M_BCAST|M_MCAST);
838 icmp_send(m, opts);
839 done:
840 if (opts)
841 (void)m_free(opts);
842 }
843
844 /*
845 * Send an icmp packet back to the ip level,
846 * after supplying a checksum.
847 */
848 void
849 icmp_send(m, opts)
850 struct mbuf *m;
851 struct mbuf *opts;
852 {
853 struct ip *ip = mtod(m, struct ip *);
854 int hlen;
855 struct icmp *icp;
856
857 hlen = ip->ip_hl << 2;
858 m->m_data += hlen;
859 m->m_len -= hlen;
860 icp = mtod(m, struct icmp *);
861 icp->icmp_cksum = 0;
862 icp->icmp_cksum = in_cksum(m, ntohs(ip->ip_len) - hlen);
863 m->m_data -= hlen;
864 m->m_len += hlen;
865 #ifdef ICMPPRINTFS
866 if (icmpprintfs)
867 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
868 #endif
869 #ifdef IPSEC
870 /* Don't lookup socket */
871 (void)ipsec_setsocket(m, NULL);
872 #endif
873 (void) ip_output(m, opts, NULL, 0, NULL);
874 }
875
876 n_time
877 iptime()
878 {
879 struct timeval atv;
880 u_long t;
881
882 microtime(&atv);
883 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
884 return (htonl(t));
885 }
886
887 int
888 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
889 int *name;
890 u_int namelen;
891 void *oldp;
892 size_t *oldlenp;
893 void *newp;
894 size_t newlen;
895 {
896 int arg, error;
897
898 /* All sysctl names at this level are terminal. */
899 if (namelen != 1)
900 return (ENOTDIR);
901
902 switch (name[0])
903 {
904 case ICMPCTL_MASKREPL:
905 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
906 break;
907 case ICMPCTL_RETURNDATABYTES:
908 arg = icmpreturndatabytes;
909 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
910 if (error)
911 break;
912 if ((arg >= 8) || (arg <= 512))
913 icmpreturndatabytes = arg;
914 else
915 error = EINVAL;
916 break;
917 case ICMPCTL_ERRPPSLIMIT:
918 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
919 break;
920 case ICMPCTL_REDIRACCEPT:
921 error = sysctl_int(oldp, oldlenp, newp, newlen,
922 &icmp_rediraccept);
923 break;
924 case ICMPCTL_REDIRTIMEOUT:
925 error = sysctl_int(oldp, oldlenp, newp, newlen,
926 &icmp_redirtimeout);
927 if (icmp_redirect_timeout_q != NULL) {
928 if (icmp_redirtimeout == 0) {
929 rt_timer_queue_destroy(icmp_redirect_timeout_q,
930 TRUE);
931 icmp_redirect_timeout_q = NULL;
932 } else {
933 rt_timer_queue_change(icmp_redirect_timeout_q,
934 icmp_redirtimeout);
935 }
936 } else if (icmp_redirtimeout > 0) {
937 icmp_redirect_timeout_q =
938 rt_timer_queue_create(icmp_redirtimeout);
939 }
940 return (error);
941 default:
942 error = ENOPROTOOPT;
943 break;
944 }
945 return error;
946 }
947
948 /* Table of common MTUs: */
949
950 static const u_int mtu_table[] = {
951 65535, 65280, 32000, 17914, 9180, 8166,
952 4352, 2002, 1492, 1006, 508, 296, 68, 0
953 };
954
955 void
956 icmp_mtudisc(icp, faddr)
957 struct icmp *icp;
958 struct in_addr faddr;
959 {
960 struct icmp_mtudisc_callback *mc;
961 struct sockaddr *dst = sintosa(&icmpsrc);
962 struct rtentry *rt;
963 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
964 int error;
965
966 rt = rtalloc1(dst, 1);
967 if (rt == 0)
968 return;
969
970 /* If we didn't get a host route, allocate one */
971
972 if ((rt->rt_flags & RTF_HOST) == 0) {
973 struct rtentry *nrt;
974
975 error = rtrequest((int) RTM_ADD, dst,
976 (struct sockaddr *) rt->rt_gateway,
977 (struct sockaddr *) 0,
978 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
979 if (error) {
980 rtfree(rt);
981 return;
982 }
983 nrt->rt_rmx = rt->rt_rmx;
984 rtfree(rt);
985 rt = nrt;
986 }
987 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
988 if (error) {
989 rtfree(rt);
990 return;
991 }
992
993 if (mtu == 0) {
994 int i = 0;
995
996 mtu = ntohs(icp->icmp_ip.ip_len);
997 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
998 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
999 mtu -= (icp->icmp_ip.ip_hl << 2);
1000
1001 /* If we still can't guess a value, try the route */
1002
1003 if (mtu == 0) {
1004 mtu = rt->rt_rmx.rmx_mtu;
1005
1006 /* If no route mtu, default to the interface mtu */
1007
1008 if (mtu == 0)
1009 mtu = rt->rt_ifp->if_mtu;
1010 }
1011
1012 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1013 if (mtu > mtu_table[i]) {
1014 mtu = mtu_table[i];
1015 break;
1016 }
1017 }
1018
1019 /*
1020 * XXX: RTV_MTU is overloaded, since the admin can set it
1021 * to turn off PMTU for a route, and the kernel can
1022 * set it to indicate a serious problem with PMTU
1023 * on a route. We should be using a separate flag
1024 * for the kernel to indicate this.
1025 */
1026
1027 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1028 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1029 rt->rt_rmx.rmx_locks |= RTV_MTU;
1030 else if (rt->rt_rmx.rmx_mtu > mtu ||
1031 rt->rt_rmx.rmx_mtu == 0) {
1032 icmpstat.icps_pmtuchg++;
1033 rt->rt_rmx.rmx_mtu = mtu;
1034 }
1035 }
1036
1037 if (rt)
1038 rtfree(rt);
1039
1040 /*
1041 * Notify protocols that the MTU for this destination
1042 * has changed.
1043 */
1044 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1045 mc = LIST_NEXT(mc, mc_list))
1046 (*mc->mc_func)(faddr);
1047 }
1048
1049 /*
1050 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1051 * given current value MTU. If DIR is less than zero, a larger plateau
1052 * is returned; otherwise, a smaller value is returned.
1053 */
1054 int
1055 ip_next_mtu(mtu, dir) /* XXX */
1056 int mtu;
1057 int dir;
1058 {
1059 int i;
1060
1061 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1062 if (mtu >= mtu_table[i])
1063 break;
1064 }
1065
1066 if (dir < 0) {
1067 if (i == 0) {
1068 return 0;
1069 } else {
1070 return mtu_table[i - 1];
1071 }
1072 } else {
1073 if (mtu_table[i] == 0) {
1074 return 0;
1075 } else if (mtu > mtu_table[i]) {
1076 return mtu_table[i];
1077 } else {
1078 return mtu_table[i + 1];
1079 }
1080 }
1081 }
1082
1083 static void
1084 icmp_mtudisc_timeout(rt, r)
1085 struct rtentry *rt;
1086 struct rttimer *r;
1087 {
1088 if (rt == NULL)
1089 panic("icmp_mtudisc_timeout: bad route to timeout");
1090 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1091 (RTF_DYNAMIC | RTF_HOST)) {
1092 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1093 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1094 } else {
1095 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1096 rt->rt_rmx.rmx_mtu = 0;
1097 }
1098 }
1099 }
1100
1101 static void
1102 icmp_redirect_timeout(rt, r)
1103 struct rtentry *rt;
1104 struct rttimer *r;
1105 {
1106 if (rt == NULL)
1107 panic("icmp_redirect_timeout: bad route to timeout");
1108 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1109 (RTF_DYNAMIC | RTF_HOST)) {
1110 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1111 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1112 }
1113 }
1114
1115 /*
1116 * Perform rate limit check.
1117 * Returns 0 if it is okay to send the icmp packet.
1118 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1119 * limitation.
1120 *
1121 * XXX per-destination/type check necessary?
1122 */
1123 static int
1124 icmp_ratelimit(dst, type, code)
1125 const struct in_addr *dst;
1126 const int type; /* not used at this moment */
1127 const int code; /* not used at this moment */
1128 {
1129
1130 /* PPS limit */
1131 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1132 icmperrppslim)) {
1133 /* The packet is subject to rate limit */
1134 return 1;
1135 }
1136
1137 /*okay to send*/
1138 return 0;
1139 }
1140