ip_icmp.c revision 1.67 1 /* $NetBSD: ip_icmp.c,v 1.67 2002/06/09 16:33:40 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.67 2002/06/09 16:33:40 itojun Exp $");
109
110 #include "opt_ipsec.h"
111
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/malloc.h>
115 #include <sys/mbuf.h>
116 #include <sys/protosw.h>
117 #include <sys/socket.h>
118 #include <sys/time.h>
119 #include <sys/kernel.h>
120 #include <sys/syslog.h>
121 #include <sys/sysctl.h>
122
123 #include <net/if.h>
124 #include <net/route.h>
125
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip.h>
130 #include <netinet/ip_icmp.h>
131 #include <netinet/ip_var.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/icmp_var.h>
134
135 #ifdef IPSEC
136 #include <netinet6/ipsec.h>
137 #include <netkey/key.h>
138 #endif
139
140 #include <machine/stdarg.h>
141
142 /*
143 * ICMP routines: error generation, receive packet processing, and
144 * routines to turnaround packets back to the originator, and
145 * host table maintenance routines.
146 */
147
148 int icmpmaskrepl = 0;
149 #ifdef ICMPPRINTFS
150 int icmpprintfs = 0;
151 #endif
152 int icmpreturndatabytes = 8;
153
154 /*
155 * List of callbacks to notify when Path MTU changes are made.
156 */
157 struct icmp_mtudisc_callback {
158 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
159 void (*mc_func) __P((struct in_addr));
160 };
161
162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
163 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
164
165 #if 0
166 static int ip_next_mtu __P((int, int));
167 #else
168 /*static*/ int ip_next_mtu __P((int, int));
169 #endif
170
171 extern int icmperrppslim;
172 static int icmperrpps_count = 0;
173 static struct timeval icmperrppslim_last;
174 static int icmp_rediraccept = 1;
175 static int icmp_redirtimeout = 0;
176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
177
178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
180
181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
182
183
184 void
185 icmp_init()
186 {
187 /*
188 * This is only useful if the user initializes redirtimeout to
189 * something other than zero.
190 */
191 if (icmp_redirtimeout != 0) {
192 icmp_redirect_timeout_q =
193 rt_timer_queue_create(icmp_redirtimeout);
194 }
195 }
196
197 /*
198 * Register a Path MTU Discovery callback.
199 */
200 void
201 icmp_mtudisc_callback_register(func)
202 void (*func) __P((struct in_addr));
203 {
204 struct icmp_mtudisc_callback *mc;
205
206 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
207 mc = LIST_NEXT(mc, mc_list)) {
208 if (mc->mc_func == func)
209 return;
210 }
211
212 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
213 if (mc == NULL)
214 panic("icmp_mtudisc_callback_register");
215
216 mc->mc_func = func;
217 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
218 }
219
220 /*
221 * Generate an error packet of type error
222 * in response to bad packet ip.
223 */
224 void
225 icmp_error(n, type, code, dest, destifp)
226 struct mbuf *n;
227 int type, code;
228 n_long dest;
229 struct ifnet *destifp;
230 {
231 struct ip *oip = mtod(n, struct ip *), *nip;
232 unsigned oiplen = oip->ip_hl << 2;
233 struct icmp *icp;
234 struct mbuf *m;
235 unsigned icmplen, mblen;
236
237 #ifdef ICMPPRINTFS
238 if (icmpprintfs)
239 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
240 #endif
241 if (type != ICMP_REDIRECT)
242 icmpstat.icps_error++;
243 /*
244 * Don't send error if the original packet was encrypted.
245 * Don't send error if not the first fragment of message.
246 * Don't error if the old packet protocol was ICMP
247 * error message, only known informational types.
248 */
249 if (n->m_flags & M_DECRYPTED)
250 goto freeit;
251 if (oip->ip_off &~ (IP_MF|IP_DF))
252 goto freeit;
253 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
254 n->m_len >= oiplen + ICMP_MINLEN &&
255 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
256 icmpstat.icps_oldicmp++;
257 goto freeit;
258 }
259 /* Don't send error in response to a multicast or broadcast packet */
260 if (n->m_flags & (M_BCAST|M_MCAST))
261 goto freeit;
262
263 /*
264 * First, do a rate limitation check.
265 */
266 if (icmp_ratelimit(&oip->ip_src, type, code)) {
267 /* XXX stat */
268 goto freeit;
269 }
270
271 /*
272 * Now, formulate icmp message
273 */
274 icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
275 /*
276 * Defend against mbuf chains shorter than oip->ip_len:
277 */
278 mblen = 0;
279 for (m = n; m && (mblen < icmplen); m = m->m_next)
280 mblen += m->m_len;
281 icmplen = min(mblen, icmplen);
282
283 /*
284 * As we are not required to return everything we have,
285 * we return whatever we can return at ease.
286 *
287 * Note that ICMP datagrams longer than 576 octets are out of spec
288 * according to RFC1812; the limit on icmpreturndatabytes below in
289 * icmp_sysctl will keep things below that limit.
290 */
291
292 KASSERT(ICMP_MINLEN <= MCLBYTES);
293
294 if (icmplen + ICMP_MINLEN > MCLBYTES)
295 icmplen = MCLBYTES - ICMP_MINLEN;
296
297 m = m_gethdr(M_DONTWAIT, MT_HEADER);
298 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
299 MCLGET(m, M_DONTWAIT);
300 if ((m->m_flags & M_EXT) == 0) {
301 m_freem(m);
302 m = NULL;
303 }
304 }
305 if (m == NULL)
306 goto freeit;
307 m->m_len = icmplen + ICMP_MINLEN;
308 if ((m->m_flags & M_EXT) == 0)
309 MH_ALIGN(m, m->m_len);
310 icp = mtod(m, struct icmp *);
311 if ((u_int)type > ICMP_MAXTYPE)
312 panic("icmp_error");
313 icmpstat.icps_outhist[type]++;
314 icp->icmp_type = type;
315 if (type == ICMP_REDIRECT)
316 icp->icmp_gwaddr.s_addr = dest;
317 else {
318 icp->icmp_void = 0;
319 /*
320 * The following assignments assume an overlay with the
321 * zeroed icmp_void field.
322 */
323 if (type == ICMP_PARAMPROB) {
324 icp->icmp_pptr = code;
325 code = 0;
326 } else if (type == ICMP_UNREACH &&
327 code == ICMP_UNREACH_NEEDFRAG && destifp)
328 icp->icmp_nextmtu = htons(destifp->if_mtu);
329 }
330
331 HTONS(oip->ip_off);
332 HTONS(oip->ip_len);
333 icp->icmp_code = code;
334 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
335 nip = &icp->icmp_ip;
336
337 /*
338 * Now, copy old ip header (without options)
339 * in front of icmp message.
340 */
341 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
342 panic("icmp len");
343 m->m_data -= sizeof(struct ip);
344 m->m_len += sizeof(struct ip);
345 m->m_pkthdr.len = m->m_len;
346 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
347 nip = mtod(m, struct ip *);
348 /* ip_v set in ip_output */
349 nip->ip_hl = sizeof(struct ip) >> 2;
350 nip->ip_tos = 0;
351 nip->ip_len = m->m_len;
352 /* ip_id set in ip_output */
353 nip->ip_off = 0;
354 /* ip_ttl set in icmp_reflect */
355 nip->ip_p = IPPROTO_ICMP;
356 nip->ip_src = oip->ip_src;
357 nip->ip_dst = oip->ip_dst;
358 icmp_reflect(m);
359
360 freeit:
361 m_freem(n);
362 }
363
364 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
365 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
366 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
367 struct sockaddr_in icmpmask = { 8, 0 };
368
369 /*
370 * Process a received ICMP message.
371 */
372 void
373 #if __STDC__
374 icmp_input(struct mbuf *m, ...)
375 #else
376 icmp_input(m, va_alist)
377 struct mbuf *m;
378 va_dcl
379 #endif
380 {
381 int proto;
382 struct icmp *icp;
383 struct ip *ip = mtod(m, struct ip *);
384 int icmplen;
385 int i;
386 struct in_ifaddr *ia;
387 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
388 int code;
389 int hlen;
390 va_list ap;
391 struct rtentry *rt;
392
393 va_start(ap, m);
394 hlen = va_arg(ap, int);
395 proto = va_arg(ap, int);
396 va_end(ap);
397
398 /*
399 * Locate icmp structure in mbuf, and check
400 * that not corrupted and of at least minimum length.
401 */
402 icmplen = ip->ip_len - hlen;
403 #ifdef ICMPPRINTFS
404 if (icmpprintfs)
405 printf("icmp_input from %x to %x, len %d\n",
406 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
407 icmplen);
408 #endif
409 if (icmplen < ICMP_MINLEN) {
410 icmpstat.icps_tooshort++;
411 goto freeit;
412 }
413 i = hlen + min(icmplen, ICMP_ADVLENMIN);
414 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
415 icmpstat.icps_tooshort++;
416 return;
417 }
418 ip = mtod(m, struct ip *);
419 m->m_len -= hlen;
420 m->m_data += hlen;
421 icp = mtod(m, struct icmp *);
422 if (in_cksum(m, icmplen)) {
423 icmpstat.icps_checksum++;
424 goto freeit;
425 }
426 m->m_len += hlen;
427 m->m_data -= hlen;
428
429 #ifdef ICMPPRINTFS
430 /*
431 * Message type specific processing.
432 */
433 if (icmpprintfs)
434 printf("icmp_input, type %d code %d\n", icp->icmp_type,
435 icp->icmp_code);
436 #endif
437 if (icp->icmp_type > ICMP_MAXTYPE)
438 goto raw;
439 icmpstat.icps_inhist[icp->icmp_type]++;
440 code = icp->icmp_code;
441 switch (icp->icmp_type) {
442
443 case ICMP_UNREACH:
444 switch (code) {
445 case ICMP_UNREACH_NET:
446 case ICMP_UNREACH_HOST:
447 case ICMP_UNREACH_PROTOCOL:
448 case ICMP_UNREACH_PORT:
449 case ICMP_UNREACH_SRCFAIL:
450 code += PRC_UNREACH_NET;
451 break;
452
453 case ICMP_UNREACH_NEEDFRAG:
454 code = PRC_MSGSIZE;
455 break;
456
457 case ICMP_UNREACH_NET_UNKNOWN:
458 case ICMP_UNREACH_NET_PROHIB:
459 case ICMP_UNREACH_TOSNET:
460 code = PRC_UNREACH_NET;
461 break;
462
463 case ICMP_UNREACH_HOST_UNKNOWN:
464 case ICMP_UNREACH_ISOLATED:
465 case ICMP_UNREACH_HOST_PROHIB:
466 case ICMP_UNREACH_TOSHOST:
467 code = PRC_UNREACH_HOST;
468 break;
469
470 default:
471 goto badcode;
472 }
473 goto deliver;
474
475 case ICMP_TIMXCEED:
476 if (code > 1)
477 goto badcode;
478 code += PRC_TIMXCEED_INTRANS;
479 goto deliver;
480
481 case ICMP_PARAMPROB:
482 if (code > 1)
483 goto badcode;
484 code = PRC_PARAMPROB;
485 goto deliver;
486
487 case ICMP_SOURCEQUENCH:
488 if (code)
489 goto badcode;
490 code = PRC_QUENCH;
491 goto deliver;
492
493 deliver:
494 /*
495 * Problem with datagram; advise higher level routines.
496 */
497 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
498 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
499 icmpstat.icps_badlen++;
500 goto freeit;
501 }
502 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
503 goto badcode;
504 NTOHS(icp->icmp_ip.ip_len);
505 #ifdef ICMPPRINTFS
506 if (icmpprintfs)
507 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
508 #endif
509 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
510 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
511 if (ctlfunc)
512 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
513 &icp->icmp_ip);
514 break;
515
516 badcode:
517 icmpstat.icps_badcode++;
518 break;
519
520 case ICMP_ECHO:
521 icp->icmp_type = ICMP_ECHOREPLY;
522 goto reflect;
523
524 case ICMP_TSTAMP:
525 if (icmplen < ICMP_TSLEN) {
526 icmpstat.icps_badlen++;
527 break;
528 }
529 icp->icmp_type = ICMP_TSTAMPREPLY;
530 icp->icmp_rtime = iptime();
531 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
532 goto reflect;
533
534 case ICMP_MASKREQ:
535 if (icmpmaskrepl == 0)
536 break;
537 /*
538 * We are not able to respond with all ones broadcast
539 * unless we receive it over a point-to-point interface.
540 */
541 if (icmplen < ICMP_MASKLEN) {
542 icmpstat.icps_badlen++;
543 break;
544 }
545 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
546 in_nullhost(ip->ip_dst))
547 icmpdst.sin_addr = ip->ip_src;
548 else
549 icmpdst.sin_addr = ip->ip_dst;
550 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
551 m->m_pkthdr.rcvif));
552 if (ia == 0)
553 break;
554 icp->icmp_type = ICMP_MASKREPLY;
555 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
556 if (in_nullhost(ip->ip_src)) {
557 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
558 ip->ip_src = ia->ia_broadaddr.sin_addr;
559 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
560 ip->ip_src = ia->ia_dstaddr.sin_addr;
561 }
562 reflect:
563 icmpstat.icps_reflect++;
564 icmpstat.icps_outhist[icp->icmp_type]++;
565 icmp_reflect(m);
566 return;
567
568 case ICMP_REDIRECT:
569 if (code > 3)
570 goto badcode;
571 if (icmp_rediraccept == 0)
572 goto freeit;
573 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
574 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
575 icmpstat.icps_badlen++;
576 break;
577 }
578 /*
579 * Short circuit routing redirects to force
580 * immediate change in the kernel's routing
581 * tables. The message is also handed to anyone
582 * listening on a raw socket (e.g. the routing
583 * daemon for use in updating its tables).
584 */
585 icmpgw.sin_addr = ip->ip_src;
586 icmpdst.sin_addr = icp->icmp_gwaddr;
587 #ifdef ICMPPRINTFS
588 if (icmpprintfs)
589 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
590 icp->icmp_gwaddr);
591 #endif
592 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
593 rt = NULL;
594 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
595 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
596 sintosa(&icmpgw), (struct rtentry **)&rt);
597 if (rt != NULL && icmp_redirtimeout != 0) {
598 i = rt_timer_add(rt, icmp_redirect_timeout,
599 icmp_redirect_timeout_q);
600 if (i)
601 log(LOG_ERR, "ICMP: redirect failed to "
602 "register timeout for route to %x, "
603 "code %d\n",
604 icp->icmp_ip.ip_dst.s_addr, i);
605 }
606 if (rt != NULL)
607 rtfree(rt);
608
609 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
610 #ifdef IPSEC
611 key_sa_routechange((struct sockaddr *)&icmpsrc);
612 #endif
613 break;
614
615 /*
616 * No kernel processing for the following;
617 * just fall through to send to raw listener.
618 */
619 case ICMP_ECHOREPLY:
620 case ICMP_ROUTERADVERT:
621 case ICMP_ROUTERSOLICIT:
622 case ICMP_TSTAMPREPLY:
623 case ICMP_IREQREPLY:
624 case ICMP_MASKREPLY:
625 default:
626 break;
627 }
628
629 raw:
630 rip_input(m, hlen, proto);
631 return;
632
633 freeit:
634 m_freem(m);
635 return;
636 }
637
638 /*
639 * Reflect the ip packet back to the source
640 */
641 void
642 icmp_reflect(m)
643 struct mbuf *m;
644 {
645 struct ip *ip = mtod(m, struct ip *);
646 struct in_ifaddr *ia;
647 struct ifaddr *ifa;
648 struct sockaddr_in *sin = 0;
649 struct in_addr t;
650 struct mbuf *opts = 0;
651 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
652
653 if (!in_canforward(ip->ip_src) &&
654 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
655 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
656 m_freem(m); /* Bad return address */
657 goto done; /* ip_output() will check for broadcast */
658 }
659 t = ip->ip_dst;
660 ip->ip_dst = ip->ip_src;
661 /*
662 * If the incoming packet was addressed directly to us, use
663 * dst as the src for the reply. Otherwise (broadcast or
664 * anonymous), use an address which corresponds to the
665 * incoming interface, with a preference for the address which
666 * corresponds to the route to the destination of the ICMP.
667 */
668
669 /* Look for packet addressed to us */
670 INADDR_TO_IA(t, ia);
671
672 /* look for packet sent to broadcast address */
673 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
674 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
675 if (ifa->ifa_addr->sa_family != AF_INET)
676 continue;
677 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
678 ia = ifatoia(ifa);
679 break;
680 }
681 }
682 }
683
684 if (ia)
685 sin = &ia->ia_addr;
686
687 icmpdst.sin_addr = t;
688
689 /* if the packet is addressed somewhere else, compute the
690 source address for packets routed back to the source, and
691 use that, if it's an address on the interface which
692 received the packet */
693 if (sin == (struct sockaddr_in *)0) {
694 struct sockaddr_in sin_dst;
695 struct route icmproute;
696 int errornum;
697
698 sin_dst.sin_family = AF_INET;
699 sin_dst.sin_len = sizeof(struct sockaddr_in);
700 sin_dst.sin_addr = ip->ip_dst;
701 bzero(&icmproute, sizeof(icmproute));
702 errornum = 0;
703 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
704 /* errornum is never used */
705 if (icmproute.ro_rt)
706 RTFREE(icmproute.ro_rt);
707 /* check to make sure sin is a source address on rcvif */
708 if (sin) {
709 t = sin->sin_addr;
710 sin = (struct sockaddr_in *)0;
711 INADDR_TO_IA(t, ia);
712 while (ia) {
713 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
714 sin = &ia->ia_addr;
715 break;
716 }
717 NEXT_IA_WITH_SAME_ADDR(ia);
718 }
719 }
720 }
721
722 /* if it was not addressed to us, but the route doesn't go out
723 the source interface, pick an address on the source
724 interface. This can happen when routing is asymmetric, or
725 when the incoming packet was encapsulated */
726 if (sin == (struct sockaddr_in *)0) {
727 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
728 if (ifa->ifa_addr->sa_family != AF_INET)
729 continue;
730 sin = &(ifatoia(ifa)->ia_addr);
731 break;
732 }
733 }
734
735 /*
736 * The following happens if the packet was not addressed to us,
737 * and was received on an interface with no IP address:
738 * We find the first AF_INET address on the first non-loopback
739 * interface.
740 */
741 if (sin == (struct sockaddr_in *)0)
742 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
743 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
744 continue;
745 sin = &ia->ia_addr;
746 break;
747 }
748
749 /*
750 * If we still didn't find an address, punt. We could have an
751 * interface up (and receiving packets) with no address.
752 */
753 if (sin == (struct sockaddr_in *)0) {
754 m_freem(m);
755 goto done;
756 }
757
758 ip->ip_src = sin->sin_addr;
759 ip->ip_ttl = MAXTTL;
760
761 if (optlen > 0) {
762 u_char *cp;
763 int opt, cnt;
764 u_int len;
765
766 /*
767 * Retrieve any source routing from the incoming packet;
768 * add on any record-route or timestamp options.
769 */
770 cp = (u_char *) (ip + 1);
771 if ((opts = ip_srcroute()) == 0 &&
772 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
773 opts->m_len = sizeof(struct in_addr);
774 *mtod(opts, struct in_addr *) = zeroin_addr;
775 }
776 if (opts) {
777 #ifdef ICMPPRINTFS
778 if (icmpprintfs)
779 printf("icmp_reflect optlen %d rt %d => ",
780 optlen, opts->m_len);
781 #endif
782 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
783 opt = cp[IPOPT_OPTVAL];
784 if (opt == IPOPT_EOL)
785 break;
786 if (opt == IPOPT_NOP)
787 len = 1;
788 else {
789 if (cnt < IPOPT_OLEN + sizeof(*cp))
790 break;
791 len = cp[IPOPT_OLEN];
792 if (len < IPOPT_OLEN + sizeof(*cp) ||
793 len > cnt)
794 break;
795 }
796 /*
797 * Should check for overflow, but it "can't happen"
798 */
799 if (opt == IPOPT_RR || opt == IPOPT_TS ||
800 opt == IPOPT_SECURITY) {
801 bcopy((caddr_t)cp,
802 mtod(opts, caddr_t) + opts->m_len, len);
803 opts->m_len += len;
804 }
805 }
806 /* Terminate & pad, if necessary */
807 if ((cnt = opts->m_len % 4) != 0) {
808 for (; cnt < 4; cnt++) {
809 *(mtod(opts, caddr_t) + opts->m_len) =
810 IPOPT_EOL;
811 opts->m_len++;
812 }
813 }
814 #ifdef ICMPPRINTFS
815 if (icmpprintfs)
816 printf("%d\n", opts->m_len);
817 #endif
818 }
819 /*
820 * Now strip out original options by copying rest of first
821 * mbuf's data back, and adjust the IP length.
822 */
823 ip->ip_len -= optlen;
824 ip->ip_hl = sizeof(struct ip) >> 2;
825 m->m_len -= optlen;
826 if (m->m_flags & M_PKTHDR)
827 m->m_pkthdr.len -= optlen;
828 optlen += sizeof(struct ip);
829 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
830 (unsigned)(m->m_len - sizeof(struct ip)));
831 }
832 m->m_flags &= ~(M_BCAST|M_MCAST);
833 icmp_send(m, opts);
834 done:
835 if (opts)
836 (void)m_free(opts);
837 }
838
839 /*
840 * Send an icmp packet back to the ip level,
841 * after supplying a checksum.
842 */
843 void
844 icmp_send(m, opts)
845 struct mbuf *m;
846 struct mbuf *opts;
847 {
848 struct ip *ip = mtod(m, struct ip *);
849 int hlen;
850 struct icmp *icp;
851
852 hlen = ip->ip_hl << 2;
853 m->m_data += hlen;
854 m->m_len -= hlen;
855 icp = mtod(m, struct icmp *);
856 icp->icmp_cksum = 0;
857 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
858 m->m_data -= hlen;
859 m->m_len += hlen;
860 #ifdef ICMPPRINTFS
861 if (icmpprintfs)
862 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
863 #endif
864 #ifdef IPSEC
865 /* Don't lookup socket */
866 (void)ipsec_setsocket(m, NULL);
867 #endif
868 (void) ip_output(m, opts, NULL, 0, NULL);
869 }
870
871 n_time
872 iptime()
873 {
874 struct timeval atv;
875 u_long t;
876
877 microtime(&atv);
878 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
879 return (htonl(t));
880 }
881
882 int
883 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
884 int *name;
885 u_int namelen;
886 void *oldp;
887 size_t *oldlenp;
888 void *newp;
889 size_t newlen;
890 {
891 int arg, error;
892
893 /* All sysctl names at this level are terminal. */
894 if (namelen != 1)
895 return (ENOTDIR);
896
897 switch (name[0])
898 {
899 case ICMPCTL_MASKREPL:
900 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
901 break;
902 case ICMPCTL_RETURNDATABYTES:
903 arg = icmpreturndatabytes;
904 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
905 if (error)
906 break;
907 if ((arg >= 8) || (arg <= 512))
908 icmpreturndatabytes = arg;
909 else
910 error = EINVAL;
911 break;
912 case ICMPCTL_ERRPPSLIMIT:
913 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
914 break;
915 case ICMPCTL_REDIRACCEPT:
916 error = sysctl_int(oldp, oldlenp, newp, newlen,
917 &icmp_rediraccept);
918 break;
919 case ICMPCTL_REDIRTIMEOUT:
920 error = sysctl_int(oldp, oldlenp, newp, newlen,
921 &icmp_redirtimeout);
922 if (icmp_redirect_timeout_q != NULL) {
923 if (icmp_redirtimeout == 0) {
924 rt_timer_queue_destroy(icmp_redirect_timeout_q,
925 TRUE);
926 icmp_redirect_timeout_q = NULL;
927 } else {
928 rt_timer_queue_change(icmp_redirect_timeout_q,
929 icmp_redirtimeout);
930 }
931 } else if (icmp_redirtimeout > 0) {
932 icmp_redirect_timeout_q =
933 rt_timer_queue_create(icmp_redirtimeout);
934 }
935 return (error);
936
937 break;
938 default:
939 error = ENOPROTOOPT;
940 break;
941 }
942 return error;
943 }
944
945 /* Table of common MTUs: */
946
947 static const u_int mtu_table[] = {
948 65535, 65280, 32000, 17914, 9180, 8166,
949 4352, 2002, 1492, 1006, 508, 296, 68, 0
950 };
951
952 void
953 icmp_mtudisc(icp, faddr)
954 struct icmp *icp;
955 struct in_addr faddr;
956 {
957 struct icmp_mtudisc_callback *mc;
958 struct sockaddr *dst = sintosa(&icmpsrc);
959 struct rtentry *rt;
960 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
961 int error;
962
963 rt = rtalloc1(dst, 1);
964 if (rt == 0)
965 return;
966
967 /* If we didn't get a host route, allocate one */
968
969 if ((rt->rt_flags & RTF_HOST) == 0) {
970 struct rtentry *nrt;
971
972 error = rtrequest((int) RTM_ADD, dst,
973 (struct sockaddr *) rt->rt_gateway,
974 (struct sockaddr *) 0,
975 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
976 if (error) {
977 rtfree(rt);
978 return;
979 }
980 nrt->rt_rmx = rt->rt_rmx;
981 rtfree(rt);
982 rt = nrt;
983 }
984 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
985 if (error) {
986 rtfree(rt);
987 return;
988 }
989
990 if (mtu == 0) {
991 int i = 0;
992
993 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
994 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
995 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
996 mtu -= (icp->icmp_ip.ip_hl << 2);
997
998 /* If we still can't guess a value, try the route */
999
1000 if (mtu == 0) {
1001 mtu = rt->rt_rmx.rmx_mtu;
1002
1003 /* If no route mtu, default to the interface mtu */
1004
1005 if (mtu == 0)
1006 mtu = rt->rt_ifp->if_mtu;
1007 }
1008
1009 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1010 if (mtu > mtu_table[i]) {
1011 mtu = mtu_table[i];
1012 break;
1013 }
1014 }
1015
1016 /*
1017 * XXX: RTV_MTU is overloaded, since the admin can set it
1018 * to turn off PMTU for a route, and the kernel can
1019 * set it to indicate a serious problem with PMTU
1020 * on a route. We should be using a separate flag
1021 * for the kernel to indicate this.
1022 */
1023
1024 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1025 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1026 rt->rt_rmx.rmx_locks |= RTV_MTU;
1027 else if (rt->rt_rmx.rmx_mtu > mtu ||
1028 rt->rt_rmx.rmx_mtu == 0) {
1029 icmpstat.icps_pmtuchg++;
1030 rt->rt_rmx.rmx_mtu = mtu;
1031 }
1032 }
1033
1034 if (rt)
1035 rtfree(rt);
1036
1037 /*
1038 * Notify protocols that the MTU for this destination
1039 * has changed.
1040 */
1041 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1042 mc = LIST_NEXT(mc, mc_list))
1043 (*mc->mc_func)(faddr);
1044 }
1045
1046 /*
1047 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1048 * given current value MTU. If DIR is less than zero, a larger plateau
1049 * is returned; otherwise, a smaller value is returned.
1050 */
1051 int
1052 ip_next_mtu(mtu, dir) /* XXX */
1053 int mtu;
1054 int dir;
1055 {
1056 int i;
1057
1058 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1059 if (mtu >= mtu_table[i])
1060 break;
1061 }
1062
1063 if (dir < 0) {
1064 if (i == 0) {
1065 return 0;
1066 } else {
1067 return mtu_table[i - 1];
1068 }
1069 } else {
1070 if (mtu_table[i] == 0) {
1071 return 0;
1072 } else if (mtu > mtu_table[i]) {
1073 return mtu_table[i];
1074 } else {
1075 return mtu_table[i + 1];
1076 }
1077 }
1078 }
1079
1080 static void
1081 icmp_mtudisc_timeout(rt, r)
1082 struct rtentry *rt;
1083 struct rttimer *r;
1084 {
1085 if (rt == NULL)
1086 panic("icmp_mtudisc_timeout: bad route to timeout");
1087 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1088 (RTF_DYNAMIC | RTF_HOST)) {
1089 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1090 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1091 } else {
1092 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1093 rt->rt_rmx.rmx_mtu = 0;
1094 }
1095 }
1096 }
1097
1098 static void
1099 icmp_redirect_timeout(rt, r)
1100 struct rtentry *rt;
1101 struct rttimer *r;
1102 {
1103 if (rt == NULL)
1104 panic("icmp_redirect_timeout: bad route to timeout");
1105 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1106 (RTF_DYNAMIC | RTF_HOST)) {
1107 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1108 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1109 }
1110 }
1111
1112 /*
1113 * Perform rate limit check.
1114 * Returns 0 if it is okay to send the icmp packet.
1115 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1116 * limitation.
1117 *
1118 * XXX per-destination/type check necessary?
1119 */
1120 static int
1121 icmp_ratelimit(dst, type, code)
1122 const struct in_addr *dst;
1123 const int type; /* not used at this moment */
1124 const int code; /* not used at this moment */
1125 {
1126
1127 /* PPS limit */
1128 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1129 icmperrppslim)) {
1130 /* The packet is subject to rate limit */
1131 return 1;
1132 }
1133
1134 /*okay to send*/
1135 return 0;
1136 }
1137