ip_icmp.c revision 1.69 1 /* $NetBSD: ip_icmp.c,v 1.69 2002/06/30 22:40:34 thorpej Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include <sys/cdefs.h>
108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.69 2002/06/30 22:40:34 thorpej Exp $");
109
110 #include "opt_ipsec.h"
111
112 #include <sys/param.h>
113 #include <sys/systm.h>
114 #include <sys/malloc.h>
115 #include <sys/mbuf.h>
116 #include <sys/protosw.h>
117 #include <sys/socket.h>
118 #include <sys/time.h>
119 #include <sys/kernel.h>
120 #include <sys/syslog.h>
121 #include <sys/sysctl.h>
122
123 #include <net/if.h>
124 #include <net/route.h>
125
126 #include <netinet/in.h>
127 #include <netinet/in_systm.h>
128 #include <netinet/in_var.h>
129 #include <netinet/ip.h>
130 #include <netinet/ip_icmp.h>
131 #include <netinet/ip_var.h>
132 #include <netinet/in_pcb.h>
133 #include <netinet/icmp_var.h>
134
135 #ifdef IPSEC
136 #include <netinet6/ipsec.h>
137 #include <netkey/key.h>
138 #endif
139
140 #include <machine/stdarg.h>
141
142 /*
143 * ICMP routines: error generation, receive packet processing, and
144 * routines to turnaround packets back to the originator, and
145 * host table maintenance routines.
146 */
147
148 int icmpmaskrepl = 0;
149 #ifdef ICMPPRINTFS
150 int icmpprintfs = 0;
151 #endif
152 int icmpreturndatabytes = 8;
153
154 /*
155 * List of callbacks to notify when Path MTU changes are made.
156 */
157 struct icmp_mtudisc_callback {
158 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
159 void (*mc_func) __P((struct in_addr));
160 };
161
162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
163 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
164
165 #if 0
166 static int ip_next_mtu __P((int, int));
167 #else
168 /*static*/ int ip_next_mtu __P((int, int));
169 #endif
170
171 extern int icmperrppslim;
172 static int icmperrpps_count = 0;
173 static struct timeval icmperrppslim_last;
174 static int icmp_rediraccept = 1;
175 static int icmp_redirtimeout = 600;
176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
177
178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
180
181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
182
183
184 void
185 icmp_init()
186 {
187 /*
188 * This is only useful if the user initializes redirtimeout to
189 * something other than zero.
190 */
191 if (icmp_redirtimeout != 0) {
192 icmp_redirect_timeout_q =
193 rt_timer_queue_create(icmp_redirtimeout);
194 }
195 }
196
197 /*
198 * Register a Path MTU Discovery callback.
199 */
200 void
201 icmp_mtudisc_callback_register(func)
202 void (*func) __P((struct in_addr));
203 {
204 struct icmp_mtudisc_callback *mc;
205
206 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
207 mc = LIST_NEXT(mc, mc_list)) {
208 if (mc->mc_func == func)
209 return;
210 }
211
212 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
213 if (mc == NULL)
214 panic("icmp_mtudisc_callback_register");
215
216 mc->mc_func = func;
217 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
218 }
219
220 /*
221 * Generate an error packet of type error
222 * in response to bad packet ip.
223 */
224 void
225 icmp_error(n, type, code, dest, destifp)
226 struct mbuf *n;
227 int type, code;
228 n_long dest;
229 struct ifnet *destifp;
230 {
231 struct ip *oip = mtod(n, struct ip *), *nip;
232 unsigned oiplen = oip->ip_hl << 2;
233 struct icmp *icp;
234 struct mbuf *m;
235 unsigned icmplen, mblen;
236
237 #ifdef ICMPPRINTFS
238 if (icmpprintfs)
239 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
240 #endif
241 if (type != ICMP_REDIRECT)
242 icmpstat.icps_error++;
243 /*
244 * Don't send error if the original packet was encrypted.
245 * Don't send error if not the first fragment of message.
246 * Don't error if the old packet protocol was ICMP
247 * error message, only known informational types.
248 */
249 if (n->m_flags & M_DECRYPTED)
250 goto freeit;
251 if (oip->ip_off &~ (IP_MF|IP_DF))
252 goto freeit;
253 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
254 n->m_len >= oiplen + ICMP_MINLEN &&
255 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
256 icmpstat.icps_oldicmp++;
257 goto freeit;
258 }
259 /* Don't send error in response to a multicast or broadcast packet */
260 if (n->m_flags & (M_BCAST|M_MCAST))
261 goto freeit;
262
263 /*
264 * First, do a rate limitation check.
265 */
266 if (icmp_ratelimit(&oip->ip_src, type, code)) {
267 /* XXX stat */
268 goto freeit;
269 }
270
271 /*
272 * Now, formulate icmp message
273 */
274 icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
275 /*
276 * Defend against mbuf chains shorter than oip->ip_len:
277 */
278 mblen = 0;
279 for (m = n; m && (mblen < icmplen); m = m->m_next)
280 mblen += m->m_len;
281 icmplen = min(mblen, icmplen);
282
283 /*
284 * As we are not required to return everything we have,
285 * we return whatever we can return at ease.
286 *
287 * Note that ICMP datagrams longer than 576 octets are out of spec
288 * according to RFC1812; the limit on icmpreturndatabytes below in
289 * icmp_sysctl will keep things below that limit.
290 */
291
292 KASSERT(ICMP_MINLEN <= MCLBYTES);
293
294 if (icmplen + ICMP_MINLEN > MCLBYTES)
295 icmplen = MCLBYTES - ICMP_MINLEN;
296
297 m = m_gethdr(M_DONTWAIT, MT_HEADER);
298 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
299 MCLGET(m, M_DONTWAIT);
300 if ((m->m_flags & M_EXT) == 0) {
301 m_freem(m);
302 m = NULL;
303 }
304 }
305 if (m == NULL)
306 goto freeit;
307 m->m_len = icmplen + ICMP_MINLEN;
308 if ((m->m_flags & M_EXT) == 0)
309 MH_ALIGN(m, m->m_len);
310 icp = mtod(m, struct icmp *);
311 if ((u_int)type > ICMP_MAXTYPE)
312 panic("icmp_error");
313 icmpstat.icps_outhist[type]++;
314 icp->icmp_type = type;
315 if (type == ICMP_REDIRECT)
316 icp->icmp_gwaddr.s_addr = dest;
317 else {
318 icp->icmp_void = 0;
319 /*
320 * The following assignments assume an overlay with the
321 * zeroed icmp_void field.
322 */
323 if (type == ICMP_PARAMPROB) {
324 icp->icmp_pptr = code;
325 code = 0;
326 } else if (type == ICMP_UNREACH &&
327 code == ICMP_UNREACH_NEEDFRAG && destifp)
328 icp->icmp_nextmtu = htons(destifp->if_mtu);
329 }
330
331 HTONS(oip->ip_off);
332 HTONS(oip->ip_len);
333 icp->icmp_code = code;
334 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
335 nip = &icp->icmp_ip;
336
337 /*
338 * Now, copy old ip header (without options)
339 * in front of icmp message.
340 */
341 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
342 panic("icmp len");
343 m->m_data -= sizeof(struct ip);
344 m->m_len += sizeof(struct ip);
345 m->m_pkthdr.len = m->m_len;
346 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
347 nip = mtod(m, struct ip *);
348 /* ip_v set in ip_output */
349 nip->ip_hl = sizeof(struct ip) >> 2;
350 nip->ip_tos = 0;
351 nip->ip_len = m->m_len;
352 /* ip_id set in ip_output */
353 nip->ip_off = 0;
354 /* ip_ttl set in icmp_reflect */
355 nip->ip_p = IPPROTO_ICMP;
356 nip->ip_src = oip->ip_src;
357 nip->ip_dst = oip->ip_dst;
358 icmp_reflect(m);
359
360 freeit:
361 m_freem(n);
362 }
363
364 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
365 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
366 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
367 struct sockaddr_in icmpmask = { 8, 0 };
368
369 /*
370 * Process a received ICMP message.
371 */
372 void
373 #if __STDC__
374 icmp_input(struct mbuf *m, ...)
375 #else
376 icmp_input(m, va_alist)
377 struct mbuf *m;
378 va_dcl
379 #endif
380 {
381 int proto;
382 struct icmp *icp;
383 struct ip *ip = mtod(m, struct ip *);
384 int icmplen;
385 int i;
386 struct in_ifaddr *ia;
387 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
388 int code;
389 int hlen;
390 va_list ap;
391 struct rtentry *rt;
392
393 va_start(ap, m);
394 hlen = va_arg(ap, int);
395 proto = va_arg(ap, int);
396 va_end(ap);
397
398 /*
399 * Locate icmp structure in mbuf, and check
400 * that not corrupted and of at least minimum length.
401 */
402 icmplen = ip->ip_len - hlen;
403 #ifdef ICMPPRINTFS
404 if (icmpprintfs)
405 printf("icmp_input from %x to %x, len %d\n",
406 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
407 icmplen);
408 #endif
409 if (icmplen < ICMP_MINLEN) {
410 icmpstat.icps_tooshort++;
411 goto freeit;
412 }
413 i = hlen + min(icmplen, ICMP_ADVLENMIN);
414 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
415 icmpstat.icps_tooshort++;
416 return;
417 }
418 ip = mtod(m, struct ip *);
419 m->m_len -= hlen;
420 m->m_data += hlen;
421 icp = mtod(m, struct icmp *);
422 /* Don't need to assert alignment, here. */
423 if (in_cksum(m, icmplen)) {
424 icmpstat.icps_checksum++;
425 goto freeit;
426 }
427 m->m_len += hlen;
428 m->m_data -= hlen;
429
430 #ifdef ICMPPRINTFS
431 /*
432 * Message type specific processing.
433 */
434 if (icmpprintfs)
435 printf("icmp_input, type %d code %d\n", icp->icmp_type,
436 icp->icmp_code);
437 #endif
438 if (icp->icmp_type > ICMP_MAXTYPE)
439 goto raw;
440 icmpstat.icps_inhist[icp->icmp_type]++;
441 code = icp->icmp_code;
442 switch (icp->icmp_type) {
443
444 case ICMP_UNREACH:
445 switch (code) {
446 case ICMP_UNREACH_NET:
447 case ICMP_UNREACH_HOST:
448 case ICMP_UNREACH_PROTOCOL:
449 case ICMP_UNREACH_PORT:
450 case ICMP_UNREACH_SRCFAIL:
451 code += PRC_UNREACH_NET;
452 break;
453
454 case ICMP_UNREACH_NEEDFRAG:
455 code = PRC_MSGSIZE;
456 break;
457
458 case ICMP_UNREACH_NET_UNKNOWN:
459 case ICMP_UNREACH_NET_PROHIB:
460 case ICMP_UNREACH_TOSNET:
461 code = PRC_UNREACH_NET;
462 break;
463
464 case ICMP_UNREACH_HOST_UNKNOWN:
465 case ICMP_UNREACH_ISOLATED:
466 case ICMP_UNREACH_HOST_PROHIB:
467 case ICMP_UNREACH_TOSHOST:
468 code = PRC_UNREACH_HOST;
469 break;
470
471 default:
472 goto badcode;
473 }
474 goto deliver;
475
476 case ICMP_TIMXCEED:
477 if (code > 1)
478 goto badcode;
479 code += PRC_TIMXCEED_INTRANS;
480 goto deliver;
481
482 case ICMP_PARAMPROB:
483 if (code > 1)
484 goto badcode;
485 code = PRC_PARAMPROB;
486 goto deliver;
487
488 case ICMP_SOURCEQUENCH:
489 if (code)
490 goto badcode;
491 code = PRC_QUENCH;
492 goto deliver;
493
494 deliver:
495 /*
496 * Problem with datagram; advise higher level routines.
497 */
498 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
499 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
500 icmpstat.icps_badlen++;
501 goto freeit;
502 }
503 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
504 goto badcode;
505 NTOHS(icp->icmp_ip.ip_len);
506 #ifdef ICMPPRINTFS
507 if (icmpprintfs)
508 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
509 #endif
510 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
511 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
512 if (ctlfunc)
513 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
514 &icp->icmp_ip);
515 break;
516
517 badcode:
518 icmpstat.icps_badcode++;
519 break;
520
521 case ICMP_ECHO:
522 icp->icmp_type = ICMP_ECHOREPLY;
523 goto reflect;
524
525 case ICMP_TSTAMP:
526 if (icmplen < ICMP_TSLEN) {
527 icmpstat.icps_badlen++;
528 break;
529 }
530 icp->icmp_type = ICMP_TSTAMPREPLY;
531 icp->icmp_rtime = iptime();
532 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
533 goto reflect;
534
535 case ICMP_MASKREQ:
536 if (icmpmaskrepl == 0)
537 break;
538 /*
539 * We are not able to respond with all ones broadcast
540 * unless we receive it over a point-to-point interface.
541 */
542 if (icmplen < ICMP_MASKLEN) {
543 icmpstat.icps_badlen++;
544 break;
545 }
546 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
547 in_nullhost(ip->ip_dst))
548 icmpdst.sin_addr = ip->ip_src;
549 else
550 icmpdst.sin_addr = ip->ip_dst;
551 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
552 m->m_pkthdr.rcvif));
553 if (ia == 0)
554 break;
555 icp->icmp_type = ICMP_MASKREPLY;
556 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
557 if (in_nullhost(ip->ip_src)) {
558 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
559 ip->ip_src = ia->ia_broadaddr.sin_addr;
560 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
561 ip->ip_src = ia->ia_dstaddr.sin_addr;
562 }
563 reflect:
564 icmpstat.icps_reflect++;
565 icmpstat.icps_outhist[icp->icmp_type]++;
566 icmp_reflect(m);
567 return;
568
569 case ICMP_REDIRECT:
570 if (code > 3)
571 goto badcode;
572 if (icmp_rediraccept == 0)
573 goto freeit;
574 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
575 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
576 icmpstat.icps_badlen++;
577 break;
578 }
579 /*
580 * Short circuit routing redirects to force
581 * immediate change in the kernel's routing
582 * tables. The message is also handed to anyone
583 * listening on a raw socket (e.g. the routing
584 * daemon for use in updating its tables).
585 */
586 icmpgw.sin_addr = ip->ip_src;
587 icmpdst.sin_addr = icp->icmp_gwaddr;
588 #ifdef ICMPPRINTFS
589 if (icmpprintfs)
590 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
591 icp->icmp_gwaddr);
592 #endif
593 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
594 rt = NULL;
595 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
596 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
597 sintosa(&icmpgw), (struct rtentry **)&rt);
598 if (rt != NULL && icmp_redirtimeout != 0) {
599 i = rt_timer_add(rt, icmp_redirect_timeout,
600 icmp_redirect_timeout_q);
601 if (i)
602 log(LOG_ERR, "ICMP: redirect failed to "
603 "register timeout for route to %x, "
604 "code %d\n",
605 icp->icmp_ip.ip_dst.s_addr, i);
606 }
607 if (rt != NULL)
608 rtfree(rt);
609
610 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
611 #ifdef IPSEC
612 key_sa_routechange((struct sockaddr *)&icmpsrc);
613 #endif
614 break;
615
616 /*
617 * No kernel processing for the following;
618 * just fall through to send to raw listener.
619 */
620 case ICMP_ECHOREPLY:
621 case ICMP_ROUTERADVERT:
622 case ICMP_ROUTERSOLICIT:
623 case ICMP_TSTAMPREPLY:
624 case ICMP_IREQREPLY:
625 case ICMP_MASKREPLY:
626 default:
627 break;
628 }
629
630 raw:
631 rip_input(m, hlen, proto);
632 return;
633
634 freeit:
635 m_freem(m);
636 return;
637 }
638
639 /*
640 * Reflect the ip packet back to the source
641 */
642 void
643 icmp_reflect(m)
644 struct mbuf *m;
645 {
646 struct ip *ip = mtod(m, struct ip *);
647 struct in_ifaddr *ia;
648 struct ifaddr *ifa;
649 struct sockaddr_in *sin = 0;
650 struct in_addr t;
651 struct mbuf *opts = 0;
652 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
653
654 if (!in_canforward(ip->ip_src) &&
655 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
656 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
657 m_freem(m); /* Bad return address */
658 goto done; /* ip_output() will check for broadcast */
659 }
660 t = ip->ip_dst;
661 ip->ip_dst = ip->ip_src;
662 /*
663 * If the incoming packet was addressed directly to us, use
664 * dst as the src for the reply. Otherwise (broadcast or
665 * anonymous), use an address which corresponds to the
666 * incoming interface, with a preference for the address which
667 * corresponds to the route to the destination of the ICMP.
668 */
669
670 /* Look for packet addressed to us */
671 INADDR_TO_IA(t, ia);
672
673 /* look for packet sent to broadcast address */
674 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
675 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
676 if (ifa->ifa_addr->sa_family != AF_INET)
677 continue;
678 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
679 ia = ifatoia(ifa);
680 break;
681 }
682 }
683 }
684
685 if (ia)
686 sin = &ia->ia_addr;
687
688 icmpdst.sin_addr = t;
689
690 /* if the packet is addressed somewhere else, compute the
691 source address for packets routed back to the source, and
692 use that, if it's an address on the interface which
693 received the packet */
694 if (sin == (struct sockaddr_in *)0) {
695 struct sockaddr_in sin_dst;
696 struct route icmproute;
697 int errornum;
698
699 sin_dst.sin_family = AF_INET;
700 sin_dst.sin_len = sizeof(struct sockaddr_in);
701 sin_dst.sin_addr = ip->ip_dst;
702 bzero(&icmproute, sizeof(icmproute));
703 errornum = 0;
704 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
705 /* errornum is never used */
706 if (icmproute.ro_rt)
707 RTFREE(icmproute.ro_rt);
708 /* check to make sure sin is a source address on rcvif */
709 if (sin) {
710 t = sin->sin_addr;
711 sin = (struct sockaddr_in *)0;
712 INADDR_TO_IA(t, ia);
713 while (ia) {
714 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
715 sin = &ia->ia_addr;
716 break;
717 }
718 NEXT_IA_WITH_SAME_ADDR(ia);
719 }
720 }
721 }
722
723 /* if it was not addressed to us, but the route doesn't go out
724 the source interface, pick an address on the source
725 interface. This can happen when routing is asymmetric, or
726 when the incoming packet was encapsulated */
727 if (sin == (struct sockaddr_in *)0) {
728 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
729 if (ifa->ifa_addr->sa_family != AF_INET)
730 continue;
731 sin = &(ifatoia(ifa)->ia_addr);
732 break;
733 }
734 }
735
736 /*
737 * The following happens if the packet was not addressed to us,
738 * and was received on an interface with no IP address:
739 * We find the first AF_INET address on the first non-loopback
740 * interface.
741 */
742 if (sin == (struct sockaddr_in *)0)
743 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
744 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
745 continue;
746 sin = &ia->ia_addr;
747 break;
748 }
749
750 /*
751 * If we still didn't find an address, punt. We could have an
752 * interface up (and receiving packets) with no address.
753 */
754 if (sin == (struct sockaddr_in *)0) {
755 m_freem(m);
756 goto done;
757 }
758
759 ip->ip_src = sin->sin_addr;
760 ip->ip_ttl = MAXTTL;
761
762 if (optlen > 0) {
763 u_char *cp;
764 int opt, cnt;
765 u_int len;
766
767 /*
768 * Retrieve any source routing from the incoming packet;
769 * add on any record-route or timestamp options.
770 */
771 cp = (u_char *) (ip + 1);
772 if ((opts = ip_srcroute()) == 0 &&
773 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
774 opts->m_len = sizeof(struct in_addr);
775 *mtod(opts, struct in_addr *) = zeroin_addr;
776 }
777 if (opts) {
778 #ifdef ICMPPRINTFS
779 if (icmpprintfs)
780 printf("icmp_reflect optlen %d rt %d => ",
781 optlen, opts->m_len);
782 #endif
783 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
784 opt = cp[IPOPT_OPTVAL];
785 if (opt == IPOPT_EOL)
786 break;
787 if (opt == IPOPT_NOP)
788 len = 1;
789 else {
790 if (cnt < IPOPT_OLEN + sizeof(*cp))
791 break;
792 len = cp[IPOPT_OLEN];
793 if (len < IPOPT_OLEN + sizeof(*cp) ||
794 len > cnt)
795 break;
796 }
797 /*
798 * Should check for overflow, but it "can't happen"
799 */
800 if (opt == IPOPT_RR || opt == IPOPT_TS ||
801 opt == IPOPT_SECURITY) {
802 bcopy((caddr_t)cp,
803 mtod(opts, caddr_t) + opts->m_len, len);
804 opts->m_len += len;
805 }
806 }
807 /* Terminate & pad, if necessary */
808 if ((cnt = opts->m_len % 4) != 0) {
809 for (; cnt < 4; cnt++) {
810 *(mtod(opts, caddr_t) + opts->m_len) =
811 IPOPT_EOL;
812 opts->m_len++;
813 }
814 }
815 #ifdef ICMPPRINTFS
816 if (icmpprintfs)
817 printf("%d\n", opts->m_len);
818 #endif
819 }
820 /*
821 * Now strip out original options by copying rest of first
822 * mbuf's data back, and adjust the IP length.
823 */
824 ip->ip_len -= optlen;
825 ip->ip_hl = sizeof(struct ip) >> 2;
826 m->m_len -= optlen;
827 if (m->m_flags & M_PKTHDR)
828 m->m_pkthdr.len -= optlen;
829 optlen += sizeof(struct ip);
830 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
831 (unsigned)(m->m_len - sizeof(struct ip)));
832 }
833 m->m_flags &= ~(M_BCAST|M_MCAST);
834 icmp_send(m, opts);
835 done:
836 if (opts)
837 (void)m_free(opts);
838 }
839
840 /*
841 * Send an icmp packet back to the ip level,
842 * after supplying a checksum.
843 */
844 void
845 icmp_send(m, opts)
846 struct mbuf *m;
847 struct mbuf *opts;
848 {
849 struct ip *ip = mtod(m, struct ip *);
850 int hlen;
851 struct icmp *icp;
852
853 hlen = ip->ip_hl << 2;
854 m->m_data += hlen;
855 m->m_len -= hlen;
856 icp = mtod(m, struct icmp *);
857 icp->icmp_cksum = 0;
858 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
859 m->m_data -= hlen;
860 m->m_len += hlen;
861 #ifdef ICMPPRINTFS
862 if (icmpprintfs)
863 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
864 #endif
865 #ifdef IPSEC
866 /* Don't lookup socket */
867 (void)ipsec_setsocket(m, NULL);
868 #endif
869 (void) ip_output(m, opts, NULL, 0, NULL);
870 }
871
872 n_time
873 iptime()
874 {
875 struct timeval atv;
876 u_long t;
877
878 microtime(&atv);
879 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
880 return (htonl(t));
881 }
882
883 int
884 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
885 int *name;
886 u_int namelen;
887 void *oldp;
888 size_t *oldlenp;
889 void *newp;
890 size_t newlen;
891 {
892 int arg, error;
893
894 /* All sysctl names at this level are terminal. */
895 if (namelen != 1)
896 return (ENOTDIR);
897
898 switch (name[0])
899 {
900 case ICMPCTL_MASKREPL:
901 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
902 break;
903 case ICMPCTL_RETURNDATABYTES:
904 arg = icmpreturndatabytes;
905 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
906 if (error)
907 break;
908 if ((arg >= 8) || (arg <= 512))
909 icmpreturndatabytes = arg;
910 else
911 error = EINVAL;
912 break;
913 case ICMPCTL_ERRPPSLIMIT:
914 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
915 break;
916 case ICMPCTL_REDIRACCEPT:
917 error = sysctl_int(oldp, oldlenp, newp, newlen,
918 &icmp_rediraccept);
919 break;
920 case ICMPCTL_REDIRTIMEOUT:
921 error = sysctl_int(oldp, oldlenp, newp, newlen,
922 &icmp_redirtimeout);
923 if (icmp_redirect_timeout_q != NULL) {
924 if (icmp_redirtimeout == 0) {
925 rt_timer_queue_destroy(icmp_redirect_timeout_q,
926 TRUE);
927 icmp_redirect_timeout_q = NULL;
928 } else {
929 rt_timer_queue_change(icmp_redirect_timeout_q,
930 icmp_redirtimeout);
931 }
932 } else if (icmp_redirtimeout > 0) {
933 icmp_redirect_timeout_q =
934 rt_timer_queue_create(icmp_redirtimeout);
935 }
936 return (error);
937
938 break;
939 default:
940 error = ENOPROTOOPT;
941 break;
942 }
943 return error;
944 }
945
946 /* Table of common MTUs: */
947
948 static const u_int mtu_table[] = {
949 65535, 65280, 32000, 17914, 9180, 8166,
950 4352, 2002, 1492, 1006, 508, 296, 68, 0
951 };
952
953 void
954 icmp_mtudisc(icp, faddr)
955 struct icmp *icp;
956 struct in_addr faddr;
957 {
958 struct icmp_mtudisc_callback *mc;
959 struct sockaddr *dst = sintosa(&icmpsrc);
960 struct rtentry *rt;
961 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
962 int error;
963
964 rt = rtalloc1(dst, 1);
965 if (rt == 0)
966 return;
967
968 /* If we didn't get a host route, allocate one */
969
970 if ((rt->rt_flags & RTF_HOST) == 0) {
971 struct rtentry *nrt;
972
973 error = rtrequest((int) RTM_ADD, dst,
974 (struct sockaddr *) rt->rt_gateway,
975 (struct sockaddr *) 0,
976 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
977 if (error) {
978 rtfree(rt);
979 return;
980 }
981 nrt->rt_rmx = rt->rt_rmx;
982 rtfree(rt);
983 rt = nrt;
984 }
985 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
986 if (error) {
987 rtfree(rt);
988 return;
989 }
990
991 if (mtu == 0) {
992 int i = 0;
993
994 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
995 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
996 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
997 mtu -= (icp->icmp_ip.ip_hl << 2);
998
999 /* If we still can't guess a value, try the route */
1000
1001 if (mtu == 0) {
1002 mtu = rt->rt_rmx.rmx_mtu;
1003
1004 /* If no route mtu, default to the interface mtu */
1005
1006 if (mtu == 0)
1007 mtu = rt->rt_ifp->if_mtu;
1008 }
1009
1010 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1011 if (mtu > mtu_table[i]) {
1012 mtu = mtu_table[i];
1013 break;
1014 }
1015 }
1016
1017 /*
1018 * XXX: RTV_MTU is overloaded, since the admin can set it
1019 * to turn off PMTU for a route, and the kernel can
1020 * set it to indicate a serious problem with PMTU
1021 * on a route. We should be using a separate flag
1022 * for the kernel to indicate this.
1023 */
1024
1025 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1026 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1027 rt->rt_rmx.rmx_locks |= RTV_MTU;
1028 else if (rt->rt_rmx.rmx_mtu > mtu ||
1029 rt->rt_rmx.rmx_mtu == 0) {
1030 icmpstat.icps_pmtuchg++;
1031 rt->rt_rmx.rmx_mtu = mtu;
1032 }
1033 }
1034
1035 if (rt)
1036 rtfree(rt);
1037
1038 /*
1039 * Notify protocols that the MTU for this destination
1040 * has changed.
1041 */
1042 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1043 mc = LIST_NEXT(mc, mc_list))
1044 (*mc->mc_func)(faddr);
1045 }
1046
1047 /*
1048 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1049 * given current value MTU. If DIR is less than zero, a larger plateau
1050 * is returned; otherwise, a smaller value is returned.
1051 */
1052 int
1053 ip_next_mtu(mtu, dir) /* XXX */
1054 int mtu;
1055 int dir;
1056 {
1057 int i;
1058
1059 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1060 if (mtu >= mtu_table[i])
1061 break;
1062 }
1063
1064 if (dir < 0) {
1065 if (i == 0) {
1066 return 0;
1067 } else {
1068 return mtu_table[i - 1];
1069 }
1070 } else {
1071 if (mtu_table[i] == 0) {
1072 return 0;
1073 } else if (mtu > mtu_table[i]) {
1074 return mtu_table[i];
1075 } else {
1076 return mtu_table[i + 1];
1077 }
1078 }
1079 }
1080
1081 static void
1082 icmp_mtudisc_timeout(rt, r)
1083 struct rtentry *rt;
1084 struct rttimer *r;
1085 {
1086 if (rt == NULL)
1087 panic("icmp_mtudisc_timeout: bad route to timeout");
1088 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1089 (RTF_DYNAMIC | RTF_HOST)) {
1090 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1091 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1092 } else {
1093 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1094 rt->rt_rmx.rmx_mtu = 0;
1095 }
1096 }
1097 }
1098
1099 static void
1100 icmp_redirect_timeout(rt, r)
1101 struct rtentry *rt;
1102 struct rttimer *r;
1103 {
1104 if (rt == NULL)
1105 panic("icmp_redirect_timeout: bad route to timeout");
1106 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1107 (RTF_DYNAMIC | RTF_HOST)) {
1108 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1109 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1110 }
1111 }
1112
1113 /*
1114 * Perform rate limit check.
1115 * Returns 0 if it is okay to send the icmp packet.
1116 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1117 * limitation.
1118 *
1119 * XXX per-destination/type check necessary?
1120 */
1121 static int
1122 icmp_ratelimit(dst, type, code)
1123 const struct in_addr *dst;
1124 const int type; /* not used at this moment */
1125 const int code; /* not used at this moment */
1126 {
1127
1128 /* PPS limit */
1129 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1130 icmperrppslim)) {
1131 /* The packet is subject to rate limit */
1132 return 1;
1133 }
1134
1135 /*okay to send*/
1136 return 0;
1137 }
1138