ip_icmp.c revision 1.63 1 /* $NetBSD: ip_icmp.c,v 1.63 2001/10/30 06:41:10 kml Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include "opt_ipsec.h"
108
109 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/malloc.h>
112 #include <sys/mbuf.h>
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/time.h>
116 #include <sys/kernel.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119
120 #include <net/if.h>
121 #include <net/route.h>
122
123 #include <netinet/in.h>
124 #include <netinet/in_systm.h>
125 #include <netinet/in_var.h>
126 #include <netinet/ip.h>
127 #include <netinet/ip_icmp.h>
128 #include <netinet/ip_var.h>
129 #include <netinet/in_pcb.h>
130 #include <netinet/icmp_var.h>
131
132 #ifdef IPSEC
133 #include <netinet6/ipsec.h>
134 #include <netkey/key.h>
135 #endif
136
137 #include <machine/stdarg.h>
138
139 /*
140 * ICMP routines: error generation, receive packet processing, and
141 * routines to turnaround packets back to the originator, and
142 * host table maintenance routines.
143 */
144
145 int icmpmaskrepl = 0;
146 #ifdef ICMPPRINTFS
147 int icmpprintfs = 0;
148 #endif
149 int icmpreturndatabytes = 8;
150
151 /*
152 * List of callbacks to notify when Path MTU changes are made.
153 */
154 struct icmp_mtudisc_callback {
155 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
156 void (*mc_func) __P((struct in_addr));
157 };
158
159 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
160 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
161
162 #if 0
163 static int ip_next_mtu __P((int, int));
164 #else
165 /*static*/ int ip_next_mtu __P((int, int));
166 #endif
167
168 extern int icmperrppslim;
169 static int icmperrpps_count = 0;
170 static struct timeval icmperrppslim_last;
171 static int icmp_rediraccept = 1;
172 static int icmp_redirtimeout = 0;
173 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
174
175 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
176 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
177
178 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
179
180
181 void
182 icmp_init()
183 {
184 /*
185 * This is only useful if the user initializes redirtimeout to
186 * something other than zero.
187 */
188 if (icmp_redirtimeout != 0) {
189 icmp_redirect_timeout_q =
190 rt_timer_queue_create(icmp_redirtimeout);
191 }
192 }
193
194 /*
195 * Register a Path MTU Discovery callback.
196 */
197 void
198 icmp_mtudisc_callback_register(func)
199 void (*func) __P((struct in_addr));
200 {
201 struct icmp_mtudisc_callback *mc;
202
203 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
204 mc = LIST_NEXT(mc, mc_list)) {
205 if (mc->mc_func == func)
206 return;
207 }
208
209 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
210 if (mc == NULL)
211 panic("icmp_mtudisc_callback_register");
212
213 mc->mc_func = func;
214 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
215 }
216
217 /*
218 * Generate an error packet of type error
219 * in response to bad packet ip.
220 */
221 void
222 icmp_error(n, type, code, dest, destifp)
223 struct mbuf *n;
224 int type, code;
225 n_long dest;
226 struct ifnet *destifp;
227 {
228 struct ip *oip = mtod(n, struct ip *), *nip;
229 unsigned oiplen = oip->ip_hl << 2;
230 struct icmp *icp;
231 struct mbuf *m;
232 unsigned icmplen, mblen;
233
234 #ifdef ICMPPRINTFS
235 if (icmpprintfs)
236 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
237 #endif
238 if (type != ICMP_REDIRECT)
239 icmpstat.icps_error++;
240 /*
241 * Don't send error if the original packet was encrypted.
242 * Don't send error if not the first fragment of message.
243 * Don't error if the old packet protocol was ICMP
244 * error message, only known informational types.
245 */
246 if (n->m_flags & M_DECRYPTED)
247 goto freeit;
248 if (oip->ip_off &~ (IP_MF|IP_DF))
249 goto freeit;
250 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
251 n->m_len >= oiplen + ICMP_MINLEN &&
252 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
253 icmpstat.icps_oldicmp++;
254 goto freeit;
255 }
256 /* Don't send error in response to a multicast or broadcast packet */
257 if (n->m_flags & (M_BCAST|M_MCAST))
258 goto freeit;
259
260 /*
261 * First, do a rate limitation check.
262 */
263 if (icmp_ratelimit(&oip->ip_src, type, code)) {
264 /* XXX stat */
265 goto freeit;
266 }
267
268 /*
269 * Now, formulate icmp message
270 */
271 icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
272 /*
273 * Defend against mbuf chains shorter than oip->ip_len:
274 */
275 mblen = 0;
276 for (m = n; m && (mblen < icmplen); m = m->m_next)
277 mblen += m->m_len;
278 icmplen = min(mblen, icmplen);
279
280 /*
281 * As we are not required to return everything we have,
282 * we return whatever we can return at ease.
283 *
284 * Note that ICMP datagrams longer than 576 octets are out of spec
285 * according to RFC1812; the limit on icmpreturndatabytes below in
286 * icmp_sysctl will keep things below that limit.
287 */
288
289 KASSERT(ICMP_MINLEN <= MCLBYTES);
290
291 if (icmplen + ICMP_MINLEN > MCLBYTES)
292 icmplen = MCLBYTES - ICMP_MINLEN;
293
294 m = m_gethdr(M_DONTWAIT, MT_HEADER);
295 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
296 MCLGET(m, M_DONTWAIT);
297 if ((m->m_flags & M_EXT) == 0) {
298 m_freem(m);
299 m = NULL;
300 }
301 }
302 if (m == NULL)
303 goto freeit;
304 m->m_len = icmplen + ICMP_MINLEN;
305 if ((m->m_flags & M_EXT) == 0)
306 MH_ALIGN(m, m->m_len);
307 icp = mtod(m, struct icmp *);
308 if ((u_int)type > ICMP_MAXTYPE)
309 panic("icmp_error");
310 icmpstat.icps_outhist[type]++;
311 icp->icmp_type = type;
312 if (type == ICMP_REDIRECT)
313 icp->icmp_gwaddr.s_addr = dest;
314 else {
315 icp->icmp_void = 0;
316 /*
317 * The following assignments assume an overlay with the
318 * zeroed icmp_void field.
319 */
320 if (type == ICMP_PARAMPROB) {
321 icp->icmp_pptr = code;
322 code = 0;
323 } else if (type == ICMP_UNREACH &&
324 code == ICMP_UNREACH_NEEDFRAG && destifp)
325 icp->icmp_nextmtu = htons(destifp->if_mtu);
326 }
327
328 HTONS(oip->ip_off);
329 HTONS(oip->ip_len);
330 icp->icmp_code = code;
331 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
332 nip = &icp->icmp_ip;
333
334 /*
335 * Now, copy old ip header (without options)
336 * in front of icmp message.
337 */
338 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
339 panic("icmp len");
340 m->m_data -= sizeof(struct ip);
341 m->m_len += sizeof(struct ip);
342 m->m_pkthdr.len = m->m_len;
343 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
344 nip = mtod(m, struct ip *);
345 /* ip_v set in ip_output */
346 nip->ip_hl = sizeof(struct ip) >> 2;
347 nip->ip_tos = 0;
348 nip->ip_len = m->m_len;
349 /* ip_id set in ip_output */
350 nip->ip_off = 0;
351 /* ip_ttl set in icmp_reflect */
352 nip->ip_p = IPPROTO_ICMP;
353 nip->ip_src = oip->ip_src;
354 nip->ip_dst = oip->ip_dst;
355 icmp_reflect(m);
356
357 freeit:
358 m_freem(n);
359 }
360
361 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
362 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
363 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
364 struct sockaddr_in icmpmask = { 8, 0 };
365
366 /*
367 * Process a received ICMP message.
368 */
369 void
370 #if __STDC__
371 icmp_input(struct mbuf *m, ...)
372 #else
373 icmp_input(m, va_alist)
374 struct mbuf *m;
375 va_dcl
376 #endif
377 {
378 int proto;
379 struct icmp *icp;
380 struct ip *ip = mtod(m, struct ip *);
381 int icmplen;
382 int i;
383 struct in_ifaddr *ia;
384 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
385 int code;
386 int hlen;
387 va_list ap;
388 struct rtentry *rt;
389
390 va_start(ap, m);
391 hlen = va_arg(ap, int);
392 proto = va_arg(ap, int);
393 va_end(ap);
394
395 /*
396 * Locate icmp structure in mbuf, and check
397 * that not corrupted and of at least minimum length.
398 */
399 icmplen = ip->ip_len - hlen;
400 #ifdef ICMPPRINTFS
401 if (icmpprintfs)
402 printf("icmp_input from %x to %x, len %d\n",
403 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
404 icmplen);
405 #endif
406 if (icmplen < ICMP_MINLEN) {
407 icmpstat.icps_tooshort++;
408 goto freeit;
409 }
410 i = hlen + min(icmplen, ICMP_ADVLENMIN);
411 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
412 icmpstat.icps_tooshort++;
413 return;
414 }
415 ip = mtod(m, struct ip *);
416 m->m_len -= hlen;
417 m->m_data += hlen;
418 icp = mtod(m, struct icmp *);
419 if (in_cksum(m, icmplen)) {
420 icmpstat.icps_checksum++;
421 goto freeit;
422 }
423 m->m_len += hlen;
424 m->m_data -= hlen;
425
426 #ifdef ICMPPRINTFS
427 /*
428 * Message type specific processing.
429 */
430 if (icmpprintfs)
431 printf("icmp_input, type %d code %d\n", icp->icmp_type,
432 icp->icmp_code);
433 #endif
434 if (icp->icmp_type > ICMP_MAXTYPE)
435 goto raw;
436 icmpstat.icps_inhist[icp->icmp_type]++;
437 code = icp->icmp_code;
438 switch (icp->icmp_type) {
439
440 case ICMP_UNREACH:
441 switch (code) {
442 case ICMP_UNREACH_NET:
443 case ICMP_UNREACH_HOST:
444 case ICMP_UNREACH_PROTOCOL:
445 case ICMP_UNREACH_PORT:
446 case ICMP_UNREACH_SRCFAIL:
447 code += PRC_UNREACH_NET;
448 break;
449
450 case ICMP_UNREACH_NEEDFRAG:
451 code = PRC_MSGSIZE;
452 break;
453
454 case ICMP_UNREACH_NET_UNKNOWN:
455 case ICMP_UNREACH_NET_PROHIB:
456 case ICMP_UNREACH_TOSNET:
457 code = PRC_UNREACH_NET;
458 break;
459
460 case ICMP_UNREACH_HOST_UNKNOWN:
461 case ICMP_UNREACH_ISOLATED:
462 case ICMP_UNREACH_HOST_PROHIB:
463 case ICMP_UNREACH_TOSHOST:
464 code = PRC_UNREACH_HOST;
465 break;
466
467 default:
468 goto badcode;
469 }
470 goto deliver;
471
472 case ICMP_TIMXCEED:
473 if (code > 1)
474 goto badcode;
475 code += PRC_TIMXCEED_INTRANS;
476 goto deliver;
477
478 case ICMP_PARAMPROB:
479 if (code > 1)
480 goto badcode;
481 code = PRC_PARAMPROB;
482 goto deliver;
483
484 case ICMP_SOURCEQUENCH:
485 if (code)
486 goto badcode;
487 code = PRC_QUENCH;
488 goto deliver;
489
490 deliver:
491 /*
492 * Problem with datagram; advise higher level routines.
493 */
494 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
495 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
496 icmpstat.icps_badlen++;
497 goto freeit;
498 }
499 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
500 goto badcode;
501 NTOHS(icp->icmp_ip.ip_len);
502 #ifdef ICMPPRINTFS
503 if (icmpprintfs)
504 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
505 #endif
506 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
507 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
508 if (ctlfunc)
509 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
510 &icp->icmp_ip);
511 break;
512
513 badcode:
514 icmpstat.icps_badcode++;
515 break;
516
517 case ICMP_ECHO:
518 icp->icmp_type = ICMP_ECHOREPLY;
519 goto reflect;
520
521 case ICMP_TSTAMP:
522 if (icmplen < ICMP_TSLEN) {
523 icmpstat.icps_badlen++;
524 break;
525 }
526 icp->icmp_type = ICMP_TSTAMPREPLY;
527 icp->icmp_rtime = iptime();
528 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
529 goto reflect;
530
531 case ICMP_MASKREQ:
532 if (icmpmaskrepl == 0)
533 break;
534 /*
535 * We are not able to respond with all ones broadcast
536 * unless we receive it over a point-to-point interface.
537 */
538 if (icmplen < ICMP_MASKLEN) {
539 icmpstat.icps_badlen++;
540 break;
541 }
542 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
543 in_nullhost(ip->ip_dst))
544 icmpdst.sin_addr = ip->ip_src;
545 else
546 icmpdst.sin_addr = ip->ip_dst;
547 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
548 m->m_pkthdr.rcvif));
549 if (ia == 0)
550 break;
551 icp->icmp_type = ICMP_MASKREPLY;
552 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
553 if (in_nullhost(ip->ip_src)) {
554 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
555 ip->ip_src = ia->ia_broadaddr.sin_addr;
556 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
557 ip->ip_src = ia->ia_dstaddr.sin_addr;
558 }
559 reflect:
560 icmpstat.icps_reflect++;
561 icmpstat.icps_outhist[icp->icmp_type]++;
562 icmp_reflect(m);
563 return;
564
565 case ICMP_REDIRECT:
566 if (code > 3)
567 goto badcode;
568 if (icmp_rediraccept == 0)
569 goto freeit;
570 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
571 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
572 icmpstat.icps_badlen++;
573 break;
574 }
575 /*
576 * Short circuit routing redirects to force
577 * immediate change in the kernel's routing
578 * tables. The message is also handed to anyone
579 * listening on a raw socket (e.g. the routing
580 * daemon for use in updating its tables).
581 */
582 icmpgw.sin_addr = ip->ip_src;
583 icmpdst.sin_addr = icp->icmp_gwaddr;
584 #ifdef ICMPPRINTFS
585 if (icmpprintfs)
586 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
587 icp->icmp_gwaddr);
588 #endif
589 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
590 rt = NULL;
591 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
592 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
593 sintosa(&icmpgw), (struct rtentry **)&rt);
594 if (rt != NULL && icmp_redirtimeout != 0) {
595 i = rt_timer_add(rt, icmp_redirect_timeout,
596 icmp_redirect_timeout_q);
597 if (i)
598 log(LOG_ERR, "ICMP: redirect failed to "
599 "register timeout for route to %x, "
600 "code %d\n",
601 icp->icmp_ip.ip_dst.s_addr, i);
602 }
603 if (rt != NULL)
604 rtfree(rt);
605
606 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
607 #ifdef IPSEC
608 key_sa_routechange((struct sockaddr *)&icmpsrc);
609 #endif
610 break;
611
612 /*
613 * No kernel processing for the following;
614 * just fall through to send to raw listener.
615 */
616 case ICMP_ECHOREPLY:
617 case ICMP_ROUTERADVERT:
618 case ICMP_ROUTERSOLICIT:
619 case ICMP_TSTAMPREPLY:
620 case ICMP_IREQREPLY:
621 case ICMP_MASKREPLY:
622 default:
623 break;
624 }
625
626 raw:
627 rip_input(m, hlen, proto);
628 return;
629
630 freeit:
631 m_freem(m);
632 return;
633 }
634
635 /*
636 * Reflect the ip packet back to the source
637 */
638 void
639 icmp_reflect(m)
640 struct mbuf *m;
641 {
642 struct ip *ip = mtod(m, struct ip *);
643 struct in_ifaddr *ia;
644 struct ifaddr *ifa;
645 struct sockaddr_in *sin = 0;
646 struct in_addr t;
647 struct mbuf *opts = 0;
648 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
649
650 if (!in_canforward(ip->ip_src) &&
651 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
652 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
653 m_freem(m); /* Bad return address */
654 goto done; /* ip_output() will check for broadcast */
655 }
656 t = ip->ip_dst;
657 ip->ip_dst = ip->ip_src;
658 /*
659 * If the incoming packet was addressed directly to us, use
660 * dst as the src for the reply. Otherwise (broadcast or
661 * anonymous), use an address which corresponds to the
662 * incoming interface, with a preference for the address which
663 * corresponds to the route to the destination of the ICMP.
664 */
665
666 /* Look for packet addressed to us */
667 INADDR_TO_IA(t, ia);
668
669 /* look for packet sent to broadcast address */
670 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
671 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
672 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
673 if (ifa->ifa_addr->sa_family != AF_INET)
674 continue;
675 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
676 ia = ifatoia(ifa);
677 break;
678 }
679 }
680 }
681
682 if (ia)
683 sin = &ia->ia_addr;
684
685 icmpdst.sin_addr = t;
686
687 /* if the packet is addressed somewhere else, compute the
688 source address for packets routed back to the source, and
689 use that, if it's an address on the interface which
690 received the packet */
691 if (sin == (struct sockaddr_in *)0) {
692 struct sockaddr_in sin_dst;
693 struct route icmproute;
694 int errornum;
695
696 sin_dst.sin_family = AF_INET;
697 sin_dst.sin_len = sizeof(struct sockaddr_in);
698 sin_dst.sin_addr = ip->ip_dst;
699 bzero(&icmproute, sizeof(icmproute));
700 errornum = 0;
701 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
702 /* errornum is never used */
703 if (icmproute.ro_rt)
704 RTFREE(icmproute.ro_rt);
705 /* check to make sure sin is a source address on rcvif */
706 if (sin) {
707 t = sin->sin_addr;
708 sin = (struct sockaddr_in *)0;
709 INADDR_TO_IA(t, ia);
710 while (ia) {
711 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
712 sin = &ia->ia_addr;
713 break;
714 }
715 NEXT_IA_WITH_SAME_ADDR(ia);
716 }
717 }
718 }
719
720 /* if it was not addressed to us, but the route doesn't go out
721 the source interface, pick an address on the source
722 interface. This can happen when routing is asymmetric, or
723 when the incoming packet was encapsulated */
724 if (sin == (struct sockaddr_in *)0) {
725 for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
726 ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
727 if (ifa->ifa_addr->sa_family != AF_INET)
728 continue;
729 sin = &(ifatoia(ifa)->ia_addr);
730 break;
731 }
732 }
733
734 /*
735 * The following happens if the packet was not addressed to us,
736 * and was received on an interface with no IP address:
737 * We find the first AF_INET address on the first non-loopback
738 * interface.
739 */
740 if (sin == (struct sockaddr_in *)0)
741 for (ia = in_ifaddr.tqh_first; ia != NULL;
742 ia = ia->ia_list.tqe_next) {
743 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
744 continue;
745 sin = &ia->ia_addr;
746 break;
747 }
748
749 /*
750 * If we still didn't find an address, punt. We could have an
751 * interface up (and receiving packets) with no address.
752 */
753 if (sin == (struct sockaddr_in *)0) {
754 m_freem(m);
755 goto done;
756 }
757
758 ip->ip_src = sin->sin_addr;
759 ip->ip_ttl = MAXTTL;
760
761 if (optlen > 0) {
762 u_char *cp;
763 int opt, cnt;
764 u_int len;
765
766 /*
767 * Retrieve any source routing from the incoming packet;
768 * add on any record-route or timestamp options.
769 */
770 cp = (u_char *) (ip + 1);
771 if ((opts = ip_srcroute()) == 0 &&
772 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
773 opts->m_len = sizeof(struct in_addr);
774 *mtod(opts, struct in_addr *) = zeroin_addr;
775 }
776 if (opts) {
777 #ifdef ICMPPRINTFS
778 if (icmpprintfs)
779 printf("icmp_reflect optlen %d rt %d => ",
780 optlen, opts->m_len);
781 #endif
782 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
783 opt = cp[IPOPT_OPTVAL];
784 if (opt == IPOPT_EOL)
785 break;
786 if (opt == IPOPT_NOP)
787 len = 1;
788 else {
789 if (cnt < IPOPT_OLEN + sizeof(*cp))
790 break;
791 len = cp[IPOPT_OLEN];
792 if (len < IPOPT_OLEN + sizeof(*cp) ||
793 len > cnt)
794 break;
795 }
796 /*
797 * Should check for overflow, but it "can't happen"
798 */
799 if (opt == IPOPT_RR || opt == IPOPT_TS ||
800 opt == IPOPT_SECURITY) {
801 bcopy((caddr_t)cp,
802 mtod(opts, caddr_t) + opts->m_len, len);
803 opts->m_len += len;
804 }
805 }
806 /* Terminate & pad, if necessary */
807 if ((cnt = opts->m_len % 4) != 0) {
808 for (; cnt < 4; cnt++) {
809 *(mtod(opts, caddr_t) + opts->m_len) =
810 IPOPT_EOL;
811 opts->m_len++;
812 }
813 }
814 #ifdef ICMPPRINTFS
815 if (icmpprintfs)
816 printf("%d\n", opts->m_len);
817 #endif
818 }
819 /*
820 * Now strip out original options by copying rest of first
821 * mbuf's data back, and adjust the IP length.
822 */
823 ip->ip_len -= optlen;
824 ip->ip_hl = sizeof(struct ip) >> 2;
825 m->m_len -= optlen;
826 if (m->m_flags & M_PKTHDR)
827 m->m_pkthdr.len -= optlen;
828 optlen += sizeof(struct ip);
829 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
830 (unsigned)(m->m_len - sizeof(struct ip)));
831 }
832 m->m_flags &= ~(M_BCAST|M_MCAST);
833 icmp_send(m, opts);
834 done:
835 if (opts)
836 (void)m_free(opts);
837 }
838
839 /*
840 * Send an icmp packet back to the ip level,
841 * after supplying a checksum.
842 */
843 void
844 icmp_send(m, opts)
845 struct mbuf *m;
846 struct mbuf *opts;
847 {
848 struct ip *ip = mtod(m, struct ip *);
849 int hlen;
850 struct icmp *icp;
851
852 hlen = ip->ip_hl << 2;
853 m->m_data += hlen;
854 m->m_len -= hlen;
855 icp = mtod(m, struct icmp *);
856 icp->icmp_cksum = 0;
857 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
858 m->m_data -= hlen;
859 m->m_len += hlen;
860 #ifdef ICMPPRINTFS
861 if (icmpprintfs)
862 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
863 #endif
864 #ifdef IPSEC
865 /* Don't lookup socket */
866 (void)ipsec_setsocket(m, NULL);
867 #endif
868 (void) ip_output(m, opts, NULL, 0, NULL);
869 }
870
871 n_time
872 iptime()
873 {
874 struct timeval atv;
875 u_long t;
876
877 microtime(&atv);
878 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
879 return (htonl(t));
880 }
881
882 int
883 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
884 int *name;
885 u_int namelen;
886 void *oldp;
887 size_t *oldlenp;
888 void *newp;
889 size_t newlen;
890 {
891 int arg, error;
892
893 /* All sysctl names at this level are terminal. */
894 if (namelen != 1)
895 return (ENOTDIR);
896
897 switch (name[0])
898 {
899 case ICMPCTL_MASKREPL:
900 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
901 break;
902 case ICMPCTL_RETURNDATABYTES:
903 arg = icmpreturndatabytes;
904 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
905 if (error)
906 break;
907 if ((arg >= 8) || (arg <= 512))
908 icmpreturndatabytes = arg;
909 else
910 error = EINVAL;
911 break;
912 case ICMPCTL_ERRPPSLIMIT:
913 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
914 break;
915 case ICMPCTL_REDIRACCEPT:
916 error = sysctl_int(oldp, oldlenp, newp, newlen,
917 &icmp_rediraccept);
918 break;
919 case ICMPCTL_REDIRTIMEOUT:
920 error = sysctl_int(oldp, oldlenp, newp, newlen,
921 &icmp_redirtimeout);
922 if (icmp_redirect_timeout_q != NULL) {
923 if (icmp_redirtimeout == 0) {
924 rt_timer_queue_destroy(icmp_redirect_timeout_q,
925 TRUE);
926 icmp_redirect_timeout_q = NULL;
927 } else {
928 rt_timer_queue_change(icmp_redirect_timeout_q,
929 icmp_redirtimeout);
930 }
931 } else if (icmp_redirtimeout > 0) {
932 icmp_redirect_timeout_q =
933 rt_timer_queue_create(icmp_redirtimeout);
934 }
935 return (error);
936
937 break;
938 default:
939 error = ENOPROTOOPT;
940 break;
941 }
942 return error;
943 }
944
945 void
946 icmp_mtudisc(icp, faddr)
947 struct icmp *icp;
948 struct in_addr faddr;
949 {
950 struct icmp_mtudisc_callback *mc;
951 struct sockaddr *dst = sintosa(&icmpsrc);
952 struct rtentry *rt;
953 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
954 int error;
955
956 /* Table of common MTUs: */
957
958 static const u_int mtu_table[] = { 65535, 65280, 32000, 17914, 9180,
959 8166, 4352, 2002, 1492, 1006, 508, 296, 68, 0};
960
961 rt = rtalloc1(dst, 1);
962 if (rt == 0)
963 return;
964
965 /* If we didn't get a host route, allocate one */
966
967 if ((rt->rt_flags & RTF_HOST) == 0) {
968 struct rtentry *nrt;
969
970 error = rtrequest((int) RTM_ADD, dst,
971 (struct sockaddr *) rt->rt_gateway,
972 (struct sockaddr *) 0,
973 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
974 if (error) {
975 rtfree(rt);
976 return;
977 }
978 nrt->rt_rmx = rt->rt_rmx;
979 rtfree(rt);
980 rt = nrt;
981 }
982 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
983 if (error) {
984 rtfree(rt);
985 return;
986 }
987
988 if (mtu == 0) {
989 int i = 0;
990
991 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
992 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
993 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
994 mtu -= (icp->icmp_ip.ip_hl << 2);
995
996 /* If we still can't guess a value, try the route */
997
998 if (mtu == 0) {
999 mtu = rt->rt_rmx.rmx_mtu;
1000
1001 /* If no route mtu, default to the interface mtu */
1002
1003 if (mtu == 0)
1004 mtu = rt->rt_ifp->if_mtu;
1005 }
1006
1007 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1008 if (mtu > mtu_table[i]) {
1009 mtu = mtu_table[i];
1010 break;
1011 }
1012 }
1013
1014 /*
1015 * XXX: RTV_MTU is overloaded, since the admin can set it
1016 * to turn off PMTU for a route, and the kernel can
1017 * set it to indicate a serious problem with PMTU
1018 * on a route. We should be using a separate flag
1019 * for the kernel to indicate this.
1020 */
1021
1022 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1023 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1024 rt->rt_rmx.rmx_locks |= RTV_MTU;
1025 else if (rt->rt_rmx.rmx_mtu > mtu ||
1026 rt->rt_rmx.rmx_mtu == 0) {
1027 icmpstat.icps_pmtuchg++;
1028 rt->rt_rmx.rmx_mtu = mtu;
1029 }
1030 }
1031
1032 if (rt)
1033 rtfree(rt);
1034
1035 /*
1036 * Notify protocols that the MTU for this destination
1037 * has changed.
1038 */
1039 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1040 mc = LIST_NEXT(mc, mc_list))
1041 (*mc->mc_func)(faddr);
1042 }
1043
1044 /*
1045 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1046 * given current value MTU. If DIR is less than zero, a larger plateau
1047 * is returned; otherwise, a smaller value is returned.
1048 */
1049 int
1050 ip_next_mtu(mtu, dir) /* XXX */
1051 int mtu;
1052 int dir;
1053 {
1054 static const u_int mtutab[] = {
1055 65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
1056 68, 0
1057 };
1058 int i;
1059
1060 for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
1061 if (mtu >= mtutab[i])
1062 break;
1063 }
1064
1065 if (dir < 0) {
1066 if (i == 0) {
1067 return 0;
1068 } else {
1069 return mtutab[i - 1];
1070 }
1071 } else {
1072 if (mtutab[i] == 0) {
1073 return 0;
1074 } else if(mtu > mtutab[i]) {
1075 return mtutab[i];
1076 } else {
1077 return mtutab[i + 1];
1078 }
1079 }
1080 }
1081
1082 static void
1083 icmp_mtudisc_timeout(rt, r)
1084 struct rtentry *rt;
1085 struct rttimer *r;
1086 {
1087 if (rt == NULL)
1088 panic("icmp_mtudisc_timeout: bad route to timeout");
1089 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1090 (RTF_DYNAMIC | RTF_HOST)) {
1091 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1092 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1093 } else {
1094 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1095 rt->rt_rmx.rmx_mtu = 0;
1096 }
1097 }
1098 }
1099
1100 static void
1101 icmp_redirect_timeout(rt, r)
1102 struct rtentry *rt;
1103 struct rttimer *r;
1104 {
1105 if (rt == NULL)
1106 panic("icmp_redirect_timeout: bad route to timeout");
1107 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1108 (RTF_DYNAMIC | RTF_HOST)) {
1109 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1110 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1111 }
1112 }
1113
1114 /*
1115 * Perform rate limit check.
1116 * Returns 0 if it is okay to send the icmp packet.
1117 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1118 * limitation.
1119 *
1120 * XXX per-destination/type check necessary?
1121 */
1122 static int
1123 icmp_ratelimit(dst, type, code)
1124 const struct in_addr *dst;
1125 const int type; /* not used at this moment */
1126 const int code; /* not used at this moment */
1127 {
1128
1129 /* PPS limit */
1130 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1131 icmperrppslim)) {
1132 /* The packet is subject to rate limit */
1133 return 1;
1134 }
1135
1136 /*okay to send*/
1137 return 0;
1138 }
1139