ip_icmp.c revision 1.65 1 /* $NetBSD: ip_icmp.c,v 1.65 2001/11/04 20:55:27 matt Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * This code is derived from software contributed to The NetBSD Foundation
37 * by Public Access Networks Corporation ("Panix"). It was developed under
38 * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 *
40 * This code is derived from software contributed to The NetBSD Foundation
41 * by Jason R. Thorpe of Zembu Labs, Inc.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the NetBSD
54 * Foundation, Inc. and its contributors.
55 * 4. Neither the name of The NetBSD Foundation nor the names of its
56 * contributors may be used to endorse or promote products derived
57 * from this software without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
61 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
63 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
64 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
65 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
66 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
67 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
68 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
69 * POSSIBILITY OF SUCH DAMAGE.
70 */
71
72 /*
73 * Copyright (c) 1982, 1986, 1988, 1993
74 * The Regents of the University of California. All rights reserved.
75 *
76 * Redistribution and use in source and binary forms, with or without
77 * modification, are permitted provided that the following conditions
78 * are met:
79 * 1. Redistributions of source code must retain the above copyright
80 * notice, this list of conditions and the following disclaimer.
81 * 2. Redistributions in binary form must reproduce the above copyright
82 * notice, this list of conditions and the following disclaimer in the
83 * documentation and/or other materials provided with the distribution.
84 * 3. All advertising materials mentioning features or use of this software
85 * must display the following acknowledgement:
86 * This product includes software developed by the University of
87 * California, Berkeley and its contributors.
88 * 4. Neither the name of the University nor the names of its contributors
89 * may be used to endorse or promote products derived from this software
90 * without specific prior written permission.
91 *
92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
102 * SUCH DAMAGE.
103 *
104 * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
105 */
106
107 #include "opt_ipsec.h"
108
109 #include <sys/param.h>
110 #include <sys/systm.h>
111 #include <sys/malloc.h>
112 #include <sys/mbuf.h>
113 #include <sys/protosw.h>
114 #include <sys/socket.h>
115 #include <sys/time.h>
116 #include <sys/kernel.h>
117 #include <sys/syslog.h>
118 #include <sys/sysctl.h>
119
120 #include <net/if.h>
121 #include <net/route.h>
122
123 #include <netinet/in.h>
124 #include <netinet/in_systm.h>
125 #include <netinet/in_var.h>
126 #include <netinet/ip.h>
127 #include <netinet/ip_icmp.h>
128 #include <netinet/ip_var.h>
129 #include <netinet/in_pcb.h>
130 #include <netinet/icmp_var.h>
131
132 #ifdef IPSEC
133 #include <netinet6/ipsec.h>
134 #include <netkey/key.h>
135 #endif
136
137 #include <machine/stdarg.h>
138
139 /*
140 * ICMP routines: error generation, receive packet processing, and
141 * routines to turnaround packets back to the originator, and
142 * host table maintenance routines.
143 */
144
145 int icmpmaskrepl = 0;
146 #ifdef ICMPPRINTFS
147 int icmpprintfs = 0;
148 #endif
149 int icmpreturndatabytes = 8;
150
151 /*
152 * List of callbacks to notify when Path MTU changes are made.
153 */
154 struct icmp_mtudisc_callback {
155 LIST_ENTRY(icmp_mtudisc_callback) mc_list;
156 void (*mc_func) __P((struct in_addr));
157 };
158
159 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
160 LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
161
162 #if 0
163 static int ip_next_mtu __P((int, int));
164 #else
165 /*static*/ int ip_next_mtu __P((int, int));
166 #endif
167
168 extern int icmperrppslim;
169 static int icmperrpps_count = 0;
170 static struct timeval icmperrppslim_last;
171 static int icmp_rediraccept = 1;
172 static int icmp_redirtimeout = 0;
173 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
174
175 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
176 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
177
178 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
179
180
181 void
182 icmp_init()
183 {
184 /*
185 * This is only useful if the user initializes redirtimeout to
186 * something other than zero.
187 */
188 if (icmp_redirtimeout != 0) {
189 icmp_redirect_timeout_q =
190 rt_timer_queue_create(icmp_redirtimeout);
191 }
192 }
193
194 /*
195 * Register a Path MTU Discovery callback.
196 */
197 void
198 icmp_mtudisc_callback_register(func)
199 void (*func) __P((struct in_addr));
200 {
201 struct icmp_mtudisc_callback *mc;
202
203 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
204 mc = LIST_NEXT(mc, mc_list)) {
205 if (mc->mc_func == func)
206 return;
207 }
208
209 mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
210 if (mc == NULL)
211 panic("icmp_mtudisc_callback_register");
212
213 mc->mc_func = func;
214 LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
215 }
216
217 /*
218 * Generate an error packet of type error
219 * in response to bad packet ip.
220 */
221 void
222 icmp_error(n, type, code, dest, destifp)
223 struct mbuf *n;
224 int type, code;
225 n_long dest;
226 struct ifnet *destifp;
227 {
228 struct ip *oip = mtod(n, struct ip *), *nip;
229 unsigned oiplen = oip->ip_hl << 2;
230 struct icmp *icp;
231 struct mbuf *m;
232 unsigned icmplen, mblen;
233
234 #ifdef ICMPPRINTFS
235 if (icmpprintfs)
236 printf("icmp_error(%x, %d, %d)\n", oip, type, code);
237 #endif
238 if (type != ICMP_REDIRECT)
239 icmpstat.icps_error++;
240 /*
241 * Don't send error if the original packet was encrypted.
242 * Don't send error if not the first fragment of message.
243 * Don't error if the old packet protocol was ICMP
244 * error message, only known informational types.
245 */
246 if (n->m_flags & M_DECRYPTED)
247 goto freeit;
248 if (oip->ip_off &~ (IP_MF|IP_DF))
249 goto freeit;
250 if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
251 n->m_len >= oiplen + ICMP_MINLEN &&
252 !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
253 icmpstat.icps_oldicmp++;
254 goto freeit;
255 }
256 /* Don't send error in response to a multicast or broadcast packet */
257 if (n->m_flags & (M_BCAST|M_MCAST))
258 goto freeit;
259
260 /*
261 * First, do a rate limitation check.
262 */
263 if (icmp_ratelimit(&oip->ip_src, type, code)) {
264 /* XXX stat */
265 goto freeit;
266 }
267
268 /*
269 * Now, formulate icmp message
270 */
271 icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
272 /*
273 * Defend against mbuf chains shorter than oip->ip_len:
274 */
275 mblen = 0;
276 for (m = n; m && (mblen < icmplen); m = m->m_next)
277 mblen += m->m_len;
278 icmplen = min(mblen, icmplen);
279
280 /*
281 * As we are not required to return everything we have,
282 * we return whatever we can return at ease.
283 *
284 * Note that ICMP datagrams longer than 576 octets are out of spec
285 * according to RFC1812; the limit on icmpreturndatabytes below in
286 * icmp_sysctl will keep things below that limit.
287 */
288
289 KASSERT(ICMP_MINLEN <= MCLBYTES);
290
291 if (icmplen + ICMP_MINLEN > MCLBYTES)
292 icmplen = MCLBYTES - ICMP_MINLEN;
293
294 m = m_gethdr(M_DONTWAIT, MT_HEADER);
295 if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
296 MCLGET(m, M_DONTWAIT);
297 if ((m->m_flags & M_EXT) == 0) {
298 m_freem(m);
299 m = NULL;
300 }
301 }
302 if (m == NULL)
303 goto freeit;
304 m->m_len = icmplen + ICMP_MINLEN;
305 if ((m->m_flags & M_EXT) == 0)
306 MH_ALIGN(m, m->m_len);
307 icp = mtod(m, struct icmp *);
308 if ((u_int)type > ICMP_MAXTYPE)
309 panic("icmp_error");
310 icmpstat.icps_outhist[type]++;
311 icp->icmp_type = type;
312 if (type == ICMP_REDIRECT)
313 icp->icmp_gwaddr.s_addr = dest;
314 else {
315 icp->icmp_void = 0;
316 /*
317 * The following assignments assume an overlay with the
318 * zeroed icmp_void field.
319 */
320 if (type == ICMP_PARAMPROB) {
321 icp->icmp_pptr = code;
322 code = 0;
323 } else if (type == ICMP_UNREACH &&
324 code == ICMP_UNREACH_NEEDFRAG && destifp)
325 icp->icmp_nextmtu = htons(destifp->if_mtu);
326 }
327
328 HTONS(oip->ip_off);
329 HTONS(oip->ip_len);
330 icp->icmp_code = code;
331 m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
332 nip = &icp->icmp_ip;
333
334 /*
335 * Now, copy old ip header (without options)
336 * in front of icmp message.
337 */
338 if (m->m_data - sizeof(struct ip) < m->m_pktdat)
339 panic("icmp len");
340 m->m_data -= sizeof(struct ip);
341 m->m_len += sizeof(struct ip);
342 m->m_pkthdr.len = m->m_len;
343 m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
344 nip = mtod(m, struct ip *);
345 /* ip_v set in ip_output */
346 nip->ip_hl = sizeof(struct ip) >> 2;
347 nip->ip_tos = 0;
348 nip->ip_len = m->m_len;
349 /* ip_id set in ip_output */
350 nip->ip_off = 0;
351 /* ip_ttl set in icmp_reflect */
352 nip->ip_p = IPPROTO_ICMP;
353 nip->ip_src = oip->ip_src;
354 nip->ip_dst = oip->ip_dst;
355 icmp_reflect(m);
356
357 freeit:
358 m_freem(n);
359 }
360
361 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
362 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
363 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
364 struct sockaddr_in icmpmask = { 8, 0 };
365
366 /*
367 * Process a received ICMP message.
368 */
369 void
370 #if __STDC__
371 icmp_input(struct mbuf *m, ...)
372 #else
373 icmp_input(m, va_alist)
374 struct mbuf *m;
375 va_dcl
376 #endif
377 {
378 int proto;
379 struct icmp *icp;
380 struct ip *ip = mtod(m, struct ip *);
381 int icmplen;
382 int i;
383 struct in_ifaddr *ia;
384 void *(*ctlfunc) __P((int, struct sockaddr *, void *));
385 int code;
386 int hlen;
387 va_list ap;
388 struct rtentry *rt;
389
390 va_start(ap, m);
391 hlen = va_arg(ap, int);
392 proto = va_arg(ap, int);
393 va_end(ap);
394
395 /*
396 * Locate icmp structure in mbuf, and check
397 * that not corrupted and of at least minimum length.
398 */
399 icmplen = ip->ip_len - hlen;
400 #ifdef ICMPPRINTFS
401 if (icmpprintfs)
402 printf("icmp_input from %x to %x, len %d\n",
403 ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
404 icmplen);
405 #endif
406 if (icmplen < ICMP_MINLEN) {
407 icmpstat.icps_tooshort++;
408 goto freeit;
409 }
410 i = hlen + min(icmplen, ICMP_ADVLENMIN);
411 if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
412 icmpstat.icps_tooshort++;
413 return;
414 }
415 ip = mtod(m, struct ip *);
416 m->m_len -= hlen;
417 m->m_data += hlen;
418 icp = mtod(m, struct icmp *);
419 if (in_cksum(m, icmplen)) {
420 icmpstat.icps_checksum++;
421 goto freeit;
422 }
423 m->m_len += hlen;
424 m->m_data -= hlen;
425
426 #ifdef ICMPPRINTFS
427 /*
428 * Message type specific processing.
429 */
430 if (icmpprintfs)
431 printf("icmp_input, type %d code %d\n", icp->icmp_type,
432 icp->icmp_code);
433 #endif
434 if (icp->icmp_type > ICMP_MAXTYPE)
435 goto raw;
436 icmpstat.icps_inhist[icp->icmp_type]++;
437 code = icp->icmp_code;
438 switch (icp->icmp_type) {
439
440 case ICMP_UNREACH:
441 switch (code) {
442 case ICMP_UNREACH_NET:
443 case ICMP_UNREACH_HOST:
444 case ICMP_UNREACH_PROTOCOL:
445 case ICMP_UNREACH_PORT:
446 case ICMP_UNREACH_SRCFAIL:
447 code += PRC_UNREACH_NET;
448 break;
449
450 case ICMP_UNREACH_NEEDFRAG:
451 code = PRC_MSGSIZE;
452 break;
453
454 case ICMP_UNREACH_NET_UNKNOWN:
455 case ICMP_UNREACH_NET_PROHIB:
456 case ICMP_UNREACH_TOSNET:
457 code = PRC_UNREACH_NET;
458 break;
459
460 case ICMP_UNREACH_HOST_UNKNOWN:
461 case ICMP_UNREACH_ISOLATED:
462 case ICMP_UNREACH_HOST_PROHIB:
463 case ICMP_UNREACH_TOSHOST:
464 code = PRC_UNREACH_HOST;
465 break;
466
467 default:
468 goto badcode;
469 }
470 goto deliver;
471
472 case ICMP_TIMXCEED:
473 if (code > 1)
474 goto badcode;
475 code += PRC_TIMXCEED_INTRANS;
476 goto deliver;
477
478 case ICMP_PARAMPROB:
479 if (code > 1)
480 goto badcode;
481 code = PRC_PARAMPROB;
482 goto deliver;
483
484 case ICMP_SOURCEQUENCH:
485 if (code)
486 goto badcode;
487 code = PRC_QUENCH;
488 goto deliver;
489
490 deliver:
491 /*
492 * Problem with datagram; advise higher level routines.
493 */
494 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
495 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
496 icmpstat.icps_badlen++;
497 goto freeit;
498 }
499 if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
500 goto badcode;
501 NTOHS(icp->icmp_ip.ip_len);
502 #ifdef ICMPPRINTFS
503 if (icmpprintfs)
504 printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
505 #endif
506 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
507 ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
508 if (ctlfunc)
509 (void) (*ctlfunc)(code, sintosa(&icmpsrc),
510 &icp->icmp_ip);
511 break;
512
513 badcode:
514 icmpstat.icps_badcode++;
515 break;
516
517 case ICMP_ECHO:
518 icp->icmp_type = ICMP_ECHOREPLY;
519 goto reflect;
520
521 case ICMP_TSTAMP:
522 if (icmplen < ICMP_TSLEN) {
523 icmpstat.icps_badlen++;
524 break;
525 }
526 icp->icmp_type = ICMP_TSTAMPREPLY;
527 icp->icmp_rtime = iptime();
528 icp->icmp_ttime = icp->icmp_rtime; /* bogus, do later! */
529 goto reflect;
530
531 case ICMP_MASKREQ:
532 if (icmpmaskrepl == 0)
533 break;
534 /*
535 * We are not able to respond with all ones broadcast
536 * unless we receive it over a point-to-point interface.
537 */
538 if (icmplen < ICMP_MASKLEN) {
539 icmpstat.icps_badlen++;
540 break;
541 }
542 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
543 in_nullhost(ip->ip_dst))
544 icmpdst.sin_addr = ip->ip_src;
545 else
546 icmpdst.sin_addr = ip->ip_dst;
547 ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
548 m->m_pkthdr.rcvif));
549 if (ia == 0)
550 break;
551 icp->icmp_type = ICMP_MASKREPLY;
552 icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
553 if (in_nullhost(ip->ip_src)) {
554 if (ia->ia_ifp->if_flags & IFF_BROADCAST)
555 ip->ip_src = ia->ia_broadaddr.sin_addr;
556 else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
557 ip->ip_src = ia->ia_dstaddr.sin_addr;
558 }
559 reflect:
560 icmpstat.icps_reflect++;
561 icmpstat.icps_outhist[icp->icmp_type]++;
562 icmp_reflect(m);
563 return;
564
565 case ICMP_REDIRECT:
566 if (code > 3)
567 goto badcode;
568 if (icmp_rediraccept == 0)
569 goto freeit;
570 if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
571 icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
572 icmpstat.icps_badlen++;
573 break;
574 }
575 /*
576 * Short circuit routing redirects to force
577 * immediate change in the kernel's routing
578 * tables. The message is also handed to anyone
579 * listening on a raw socket (e.g. the routing
580 * daemon for use in updating its tables).
581 */
582 icmpgw.sin_addr = ip->ip_src;
583 icmpdst.sin_addr = icp->icmp_gwaddr;
584 #ifdef ICMPPRINTFS
585 if (icmpprintfs)
586 printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
587 icp->icmp_gwaddr);
588 #endif
589 icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
590 rt = NULL;
591 rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
592 (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
593 sintosa(&icmpgw), (struct rtentry **)&rt);
594 if (rt != NULL && icmp_redirtimeout != 0) {
595 i = rt_timer_add(rt, icmp_redirect_timeout,
596 icmp_redirect_timeout_q);
597 if (i)
598 log(LOG_ERR, "ICMP: redirect failed to "
599 "register timeout for route to %x, "
600 "code %d\n",
601 icp->icmp_ip.ip_dst.s_addr, i);
602 }
603 if (rt != NULL)
604 rtfree(rt);
605
606 pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
607 #ifdef IPSEC
608 key_sa_routechange((struct sockaddr *)&icmpsrc);
609 #endif
610 break;
611
612 /*
613 * No kernel processing for the following;
614 * just fall through to send to raw listener.
615 */
616 case ICMP_ECHOREPLY:
617 case ICMP_ROUTERADVERT:
618 case ICMP_ROUTERSOLICIT:
619 case ICMP_TSTAMPREPLY:
620 case ICMP_IREQREPLY:
621 case ICMP_MASKREPLY:
622 default:
623 break;
624 }
625
626 raw:
627 rip_input(m, hlen, proto);
628 return;
629
630 freeit:
631 m_freem(m);
632 return;
633 }
634
635 /*
636 * Reflect the ip packet back to the source
637 */
638 void
639 icmp_reflect(m)
640 struct mbuf *m;
641 {
642 struct ip *ip = mtod(m, struct ip *);
643 struct in_ifaddr *ia;
644 struct ifaddr *ifa;
645 struct sockaddr_in *sin = 0;
646 struct in_addr t;
647 struct mbuf *opts = 0;
648 int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
649
650 if (!in_canforward(ip->ip_src) &&
651 ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
652 htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
653 m_freem(m); /* Bad return address */
654 goto done; /* ip_output() will check for broadcast */
655 }
656 t = ip->ip_dst;
657 ip->ip_dst = ip->ip_src;
658 /*
659 * If the incoming packet was addressed directly to us, use
660 * dst as the src for the reply. Otherwise (broadcast or
661 * anonymous), use an address which corresponds to the
662 * incoming interface, with a preference for the address which
663 * corresponds to the route to the destination of the ICMP.
664 */
665
666 /* Look for packet addressed to us */
667 INADDR_TO_IA(t, ia);
668
669 /* look for packet sent to broadcast address */
670 if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
671 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
672 if (ifa->ifa_addr->sa_family != AF_INET)
673 continue;
674 if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
675 ia = ifatoia(ifa);
676 break;
677 }
678 }
679 }
680
681 if (ia)
682 sin = &ia->ia_addr;
683
684 icmpdst.sin_addr = t;
685
686 /* if the packet is addressed somewhere else, compute the
687 source address for packets routed back to the source, and
688 use that, if it's an address on the interface which
689 received the packet */
690 if (sin == (struct sockaddr_in *)0) {
691 struct sockaddr_in sin_dst;
692 struct route icmproute;
693 int errornum;
694
695 sin_dst.sin_family = AF_INET;
696 sin_dst.sin_len = sizeof(struct sockaddr_in);
697 sin_dst.sin_addr = ip->ip_dst;
698 bzero(&icmproute, sizeof(icmproute));
699 errornum = 0;
700 sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
701 /* errornum is never used */
702 if (icmproute.ro_rt)
703 RTFREE(icmproute.ro_rt);
704 /* check to make sure sin is a source address on rcvif */
705 if (sin) {
706 t = sin->sin_addr;
707 sin = (struct sockaddr_in *)0;
708 INADDR_TO_IA(t, ia);
709 while (ia) {
710 if (ia->ia_ifp == m->m_pkthdr.rcvif) {
711 sin = &ia->ia_addr;
712 break;
713 }
714 NEXT_IA_WITH_SAME_ADDR(ia);
715 }
716 }
717 }
718
719 /* if it was not addressed to us, but the route doesn't go out
720 the source interface, pick an address on the source
721 interface. This can happen when routing is asymmetric, or
722 when the incoming packet was encapsulated */
723 if (sin == (struct sockaddr_in *)0) {
724 TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
725 if (ifa->ifa_addr->sa_family != AF_INET)
726 continue;
727 sin = &(ifatoia(ifa)->ia_addr);
728 break;
729 }
730 }
731
732 /*
733 * The following happens if the packet was not addressed to us,
734 * and was received on an interface with no IP address:
735 * We find the first AF_INET address on the first non-loopback
736 * interface.
737 */
738 if (sin == (struct sockaddr_in *)0)
739 TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
740 if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
741 continue;
742 sin = &ia->ia_addr;
743 break;
744 }
745
746 /*
747 * If we still didn't find an address, punt. We could have an
748 * interface up (and receiving packets) with no address.
749 */
750 if (sin == (struct sockaddr_in *)0) {
751 m_freem(m);
752 goto done;
753 }
754
755 ip->ip_src = sin->sin_addr;
756 ip->ip_ttl = MAXTTL;
757
758 if (optlen > 0) {
759 u_char *cp;
760 int opt, cnt;
761 u_int len;
762
763 /*
764 * Retrieve any source routing from the incoming packet;
765 * add on any record-route or timestamp options.
766 */
767 cp = (u_char *) (ip + 1);
768 if ((opts = ip_srcroute()) == 0 &&
769 (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
770 opts->m_len = sizeof(struct in_addr);
771 *mtod(opts, struct in_addr *) = zeroin_addr;
772 }
773 if (opts) {
774 #ifdef ICMPPRINTFS
775 if (icmpprintfs)
776 printf("icmp_reflect optlen %d rt %d => ",
777 optlen, opts->m_len);
778 #endif
779 for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
780 opt = cp[IPOPT_OPTVAL];
781 if (opt == IPOPT_EOL)
782 break;
783 if (opt == IPOPT_NOP)
784 len = 1;
785 else {
786 if (cnt < IPOPT_OLEN + sizeof(*cp))
787 break;
788 len = cp[IPOPT_OLEN];
789 if (len < IPOPT_OLEN + sizeof(*cp) ||
790 len > cnt)
791 break;
792 }
793 /*
794 * Should check for overflow, but it "can't happen"
795 */
796 if (opt == IPOPT_RR || opt == IPOPT_TS ||
797 opt == IPOPT_SECURITY) {
798 bcopy((caddr_t)cp,
799 mtod(opts, caddr_t) + opts->m_len, len);
800 opts->m_len += len;
801 }
802 }
803 /* Terminate & pad, if necessary */
804 if ((cnt = opts->m_len % 4) != 0) {
805 for (; cnt < 4; cnt++) {
806 *(mtod(opts, caddr_t) + opts->m_len) =
807 IPOPT_EOL;
808 opts->m_len++;
809 }
810 }
811 #ifdef ICMPPRINTFS
812 if (icmpprintfs)
813 printf("%d\n", opts->m_len);
814 #endif
815 }
816 /*
817 * Now strip out original options by copying rest of first
818 * mbuf's data back, and adjust the IP length.
819 */
820 ip->ip_len -= optlen;
821 ip->ip_hl = sizeof(struct ip) >> 2;
822 m->m_len -= optlen;
823 if (m->m_flags & M_PKTHDR)
824 m->m_pkthdr.len -= optlen;
825 optlen += sizeof(struct ip);
826 bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
827 (unsigned)(m->m_len - sizeof(struct ip)));
828 }
829 m->m_flags &= ~(M_BCAST|M_MCAST);
830 icmp_send(m, opts);
831 done:
832 if (opts)
833 (void)m_free(opts);
834 }
835
836 /*
837 * Send an icmp packet back to the ip level,
838 * after supplying a checksum.
839 */
840 void
841 icmp_send(m, opts)
842 struct mbuf *m;
843 struct mbuf *opts;
844 {
845 struct ip *ip = mtod(m, struct ip *);
846 int hlen;
847 struct icmp *icp;
848
849 hlen = ip->ip_hl << 2;
850 m->m_data += hlen;
851 m->m_len -= hlen;
852 icp = mtod(m, struct icmp *);
853 icp->icmp_cksum = 0;
854 icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
855 m->m_data -= hlen;
856 m->m_len += hlen;
857 #ifdef ICMPPRINTFS
858 if (icmpprintfs)
859 printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
860 #endif
861 #ifdef IPSEC
862 /* Don't lookup socket */
863 (void)ipsec_setsocket(m, NULL);
864 #endif
865 (void) ip_output(m, opts, NULL, 0, NULL);
866 }
867
868 n_time
869 iptime()
870 {
871 struct timeval atv;
872 u_long t;
873
874 microtime(&atv);
875 t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
876 return (htonl(t));
877 }
878
879 int
880 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
881 int *name;
882 u_int namelen;
883 void *oldp;
884 size_t *oldlenp;
885 void *newp;
886 size_t newlen;
887 {
888 int arg, error;
889
890 /* All sysctl names at this level are terminal. */
891 if (namelen != 1)
892 return (ENOTDIR);
893
894 switch (name[0])
895 {
896 case ICMPCTL_MASKREPL:
897 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
898 break;
899 case ICMPCTL_RETURNDATABYTES:
900 arg = icmpreturndatabytes;
901 error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
902 if (error)
903 break;
904 if ((arg >= 8) || (arg <= 512))
905 icmpreturndatabytes = arg;
906 else
907 error = EINVAL;
908 break;
909 case ICMPCTL_ERRPPSLIMIT:
910 error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
911 break;
912 case ICMPCTL_REDIRACCEPT:
913 error = sysctl_int(oldp, oldlenp, newp, newlen,
914 &icmp_rediraccept);
915 break;
916 case ICMPCTL_REDIRTIMEOUT:
917 error = sysctl_int(oldp, oldlenp, newp, newlen,
918 &icmp_redirtimeout);
919 if (icmp_redirect_timeout_q != NULL) {
920 if (icmp_redirtimeout == 0) {
921 rt_timer_queue_destroy(icmp_redirect_timeout_q,
922 TRUE);
923 icmp_redirect_timeout_q = NULL;
924 } else {
925 rt_timer_queue_change(icmp_redirect_timeout_q,
926 icmp_redirtimeout);
927 }
928 } else if (icmp_redirtimeout > 0) {
929 icmp_redirect_timeout_q =
930 rt_timer_queue_create(icmp_redirtimeout);
931 }
932 return (error);
933
934 break;
935 default:
936 error = ENOPROTOOPT;
937 break;
938 }
939 return error;
940 }
941
942 /* Table of common MTUs: */
943
944 static const u_int mtu_table[] = {
945 65535, 65280, 32000, 17914, 9180, 8166,
946 4352, 2002, 1492, 1006, 508, 296, 68, 0
947 };
948
949 void
950 icmp_mtudisc(icp, faddr)
951 struct icmp *icp;
952 struct in_addr faddr;
953 {
954 struct icmp_mtudisc_callback *mc;
955 struct sockaddr *dst = sintosa(&icmpsrc);
956 struct rtentry *rt;
957 u_long mtu = ntohs(icp->icmp_nextmtu); /* Why a long? IPv6 */
958 int error;
959
960 rt = rtalloc1(dst, 1);
961 if (rt == 0)
962 return;
963
964 /* If we didn't get a host route, allocate one */
965
966 if ((rt->rt_flags & RTF_HOST) == 0) {
967 struct rtentry *nrt;
968
969 error = rtrequest((int) RTM_ADD, dst,
970 (struct sockaddr *) rt->rt_gateway,
971 (struct sockaddr *) 0,
972 RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
973 if (error) {
974 rtfree(rt);
975 return;
976 }
977 nrt->rt_rmx = rt->rt_rmx;
978 rtfree(rt);
979 rt = nrt;
980 }
981 error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
982 if (error) {
983 rtfree(rt);
984 return;
985 }
986
987 if (mtu == 0) {
988 int i = 0;
989
990 mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
991 /* Some 4.2BSD-based routers incorrectly adjust the ip_len */
992 if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
993 mtu -= (icp->icmp_ip.ip_hl << 2);
994
995 /* If we still can't guess a value, try the route */
996
997 if (mtu == 0) {
998 mtu = rt->rt_rmx.rmx_mtu;
999
1000 /* If no route mtu, default to the interface mtu */
1001
1002 if (mtu == 0)
1003 mtu = rt->rt_ifp->if_mtu;
1004 }
1005
1006 for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
1007 if (mtu > mtu_table[i]) {
1008 mtu = mtu_table[i];
1009 break;
1010 }
1011 }
1012
1013 /*
1014 * XXX: RTV_MTU is overloaded, since the admin can set it
1015 * to turn off PMTU for a route, and the kernel can
1016 * set it to indicate a serious problem with PMTU
1017 * on a route. We should be using a separate flag
1018 * for the kernel to indicate this.
1019 */
1020
1021 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1022 if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
1023 rt->rt_rmx.rmx_locks |= RTV_MTU;
1024 else if (rt->rt_rmx.rmx_mtu > mtu ||
1025 rt->rt_rmx.rmx_mtu == 0) {
1026 icmpstat.icps_pmtuchg++;
1027 rt->rt_rmx.rmx_mtu = mtu;
1028 }
1029 }
1030
1031 if (rt)
1032 rtfree(rt);
1033
1034 /*
1035 * Notify protocols that the MTU for this destination
1036 * has changed.
1037 */
1038 for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
1039 mc = LIST_NEXT(mc, mc_list))
1040 (*mc->mc_func)(faddr);
1041 }
1042
1043 /*
1044 * Return the next larger or smaller MTU plateau (table from RFC 1191)
1045 * given current value MTU. If DIR is less than zero, a larger plateau
1046 * is returned; otherwise, a smaller value is returned.
1047 */
1048 int
1049 ip_next_mtu(mtu, dir) /* XXX */
1050 int mtu;
1051 int dir;
1052 {
1053 int i;
1054
1055 for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
1056 if (mtu >= mtu_table[i])
1057 break;
1058 }
1059
1060 if (dir < 0) {
1061 if (i == 0) {
1062 return 0;
1063 } else {
1064 return mtu_table[i - 1];
1065 }
1066 } else {
1067 if (mtu_table[i] == 0) {
1068 return 0;
1069 } else if (mtu > mtu_table[i]) {
1070 return mtu_table[i];
1071 } else {
1072 return mtu_table[i + 1];
1073 }
1074 }
1075 }
1076
1077 static void
1078 icmp_mtudisc_timeout(rt, r)
1079 struct rtentry *rt;
1080 struct rttimer *r;
1081 {
1082 if (rt == NULL)
1083 panic("icmp_mtudisc_timeout: bad route to timeout");
1084 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1085 (RTF_DYNAMIC | RTF_HOST)) {
1086 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1087 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1088 } else {
1089 if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
1090 rt->rt_rmx.rmx_mtu = 0;
1091 }
1092 }
1093 }
1094
1095 static void
1096 icmp_redirect_timeout(rt, r)
1097 struct rtentry *rt;
1098 struct rttimer *r;
1099 {
1100 if (rt == NULL)
1101 panic("icmp_redirect_timeout: bad route to timeout");
1102 if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
1103 (RTF_DYNAMIC | RTF_HOST)) {
1104 rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
1105 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
1106 }
1107 }
1108
1109 /*
1110 * Perform rate limit check.
1111 * Returns 0 if it is okay to send the icmp packet.
1112 * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
1113 * limitation.
1114 *
1115 * XXX per-destination/type check necessary?
1116 */
1117 static int
1118 icmp_ratelimit(dst, type, code)
1119 const struct in_addr *dst;
1120 const int type; /* not used at this moment */
1121 const int code; /* not used at this moment */
1122 {
1123
1124 /* PPS limit */
1125 if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
1126 icmperrppslim)) {
1127 /* The packet is subject to rate limit */
1128 return 1;
1129 }
1130
1131 /*okay to send*/
1132 return 0;
1133 }
1134