Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.69
      1 /*	$NetBSD: ip_icmp.c,v 1.69 2002/06/30 22:40:34 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * This code is derived from software contributed to The NetBSD Foundation
     41  * by Jason R. Thorpe of Zembu Labs, Inc.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the NetBSD
     54  *	Foundation, Inc. and its contributors.
     55  * 4. Neither the name of The NetBSD Foundation nor the names of its
     56  *    contributors may be used to endorse or promote products derived
     57  *    from this software without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     69  * POSSIBILITY OF SUCH DAMAGE.
     70  */
     71 
     72 /*
     73  * Copyright (c) 1982, 1986, 1988, 1993
     74  *	The Regents of the University of California.  All rights reserved.
     75  *
     76  * Redistribution and use in source and binary forms, with or without
     77  * modification, are permitted provided that the following conditions
     78  * are met:
     79  * 1. Redistributions of source code must retain the above copyright
     80  *    notice, this list of conditions and the following disclaimer.
     81  * 2. Redistributions in binary form must reproduce the above copyright
     82  *    notice, this list of conditions and the following disclaimer in the
     83  *    documentation and/or other materials provided with the distribution.
     84  * 3. All advertising materials mentioning features or use of this software
     85  *    must display the following acknowledgement:
     86  *	This product includes software developed by the University of
     87  *	California, Berkeley and its contributors.
     88  * 4. Neither the name of the University nor the names of its contributors
     89  *    may be used to endorse or promote products derived from this software
     90  *    without specific prior written permission.
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    102  * SUCH DAMAGE.
    103  *
    104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    105  */
    106 
    107 #include <sys/cdefs.h>
    108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.69 2002/06/30 22:40:34 thorpej Exp $");
    109 
    110 #include "opt_ipsec.h"
    111 
    112 #include <sys/param.h>
    113 #include <sys/systm.h>
    114 #include <sys/malloc.h>
    115 #include <sys/mbuf.h>
    116 #include <sys/protosw.h>
    117 #include <sys/socket.h>
    118 #include <sys/time.h>
    119 #include <sys/kernel.h>
    120 #include <sys/syslog.h>
    121 #include <sys/sysctl.h>
    122 
    123 #include <net/if.h>
    124 #include <net/route.h>
    125 
    126 #include <netinet/in.h>
    127 #include <netinet/in_systm.h>
    128 #include <netinet/in_var.h>
    129 #include <netinet/ip.h>
    130 #include <netinet/ip_icmp.h>
    131 #include <netinet/ip_var.h>
    132 #include <netinet/in_pcb.h>
    133 #include <netinet/icmp_var.h>
    134 
    135 #ifdef IPSEC
    136 #include <netinet6/ipsec.h>
    137 #include <netkey/key.h>
    138 #endif
    139 
    140 #include <machine/stdarg.h>
    141 
    142 /*
    143  * ICMP routines: error generation, receive packet processing, and
    144  * routines to turnaround packets back to the originator, and
    145  * host table maintenance routines.
    146  */
    147 
    148 int	icmpmaskrepl = 0;
    149 #ifdef ICMPPRINTFS
    150 int	icmpprintfs = 0;
    151 #endif
    152 int	icmpreturndatabytes = 8;
    153 
    154 /*
    155  * List of callbacks to notify when Path MTU changes are made.
    156  */
    157 struct icmp_mtudisc_callback {
    158 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
    159 	void (*mc_func) __P((struct in_addr));
    160 };
    161 
    162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
    163     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
    164 
    165 #if 0
    166 static int	ip_next_mtu __P((int, int));
    167 #else
    168 /*static*/ int	ip_next_mtu __P((int, int));
    169 #endif
    170 
    171 extern int icmperrppslim;
    172 static int icmperrpps_count = 0;
    173 static struct timeval icmperrppslim_last;
    174 static int icmp_rediraccept = 1;
    175 static int icmp_redirtimeout = 600;
    176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
    177 
    178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
    180 
    181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    182 
    183 
    184 void
    185 icmp_init()
    186 {
    187 	/*
    188 	 * This is only useful if the user initializes redirtimeout to
    189 	 * something other than zero.
    190 	 */
    191 	if (icmp_redirtimeout != 0) {
    192 		icmp_redirect_timeout_q =
    193 			rt_timer_queue_create(icmp_redirtimeout);
    194 	}
    195 }
    196 
    197 /*
    198  * Register a Path MTU Discovery callback.
    199  */
    200 void
    201 icmp_mtudisc_callback_register(func)
    202 	void (*func) __P((struct in_addr));
    203 {
    204 	struct icmp_mtudisc_callback *mc;
    205 
    206 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    207 	     mc = LIST_NEXT(mc, mc_list)) {
    208 		if (mc->mc_func == func)
    209 			return;
    210 	}
    211 
    212 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
    213 	if (mc == NULL)
    214 		panic("icmp_mtudisc_callback_register");
    215 
    216 	mc->mc_func = func;
    217 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
    218 }
    219 
    220 /*
    221  * Generate an error packet of type error
    222  * in response to bad packet ip.
    223  */
    224 void
    225 icmp_error(n, type, code, dest, destifp)
    226 	struct mbuf *n;
    227 	int type, code;
    228 	n_long dest;
    229 	struct ifnet *destifp;
    230 {
    231 	struct ip *oip = mtod(n, struct ip *), *nip;
    232 	unsigned oiplen = oip->ip_hl << 2;
    233 	struct icmp *icp;
    234 	struct mbuf *m;
    235 	unsigned icmplen, mblen;
    236 
    237 #ifdef ICMPPRINTFS
    238 	if (icmpprintfs)
    239 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    240 #endif
    241 	if (type != ICMP_REDIRECT)
    242 		icmpstat.icps_error++;
    243 	/*
    244 	 * Don't send error if the original packet was encrypted.
    245 	 * Don't send error if not the first fragment of message.
    246 	 * Don't error if the old packet protocol was ICMP
    247 	 * error message, only known informational types.
    248 	 */
    249 	if (n->m_flags & M_DECRYPTED)
    250 		goto freeit;
    251 	if (oip->ip_off &~ (IP_MF|IP_DF))
    252 		goto freeit;
    253 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    254 	  n->m_len >= oiplen + ICMP_MINLEN &&
    255 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    256 		icmpstat.icps_oldicmp++;
    257 		goto freeit;
    258 	}
    259 	/* Don't send error in response to a multicast or broadcast packet */
    260 	if (n->m_flags & (M_BCAST|M_MCAST))
    261 		goto freeit;
    262 
    263 	/*
    264 	 * First, do a rate limitation check.
    265 	 */
    266 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    267 		/* XXX stat */
    268 		goto freeit;
    269 	}
    270 
    271 	/*
    272 	 * Now, formulate icmp message
    273 	 */
    274 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    275 	/*
    276 	 * Defend against mbuf chains shorter than oip->ip_len:
    277 	 */
    278 	mblen = 0;
    279 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    280 		mblen += m->m_len;
    281 	icmplen = min(mblen, icmplen);
    282 
    283 	/*
    284 	 * As we are not required to return everything we have,
    285 	 * we return whatever we can return at ease.
    286 	 *
    287 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    288 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    289 	 * icmp_sysctl will keep things below that limit.
    290 	 */
    291 
    292 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    293 
    294 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    295 		icmplen = MCLBYTES - ICMP_MINLEN;
    296 
    297 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    298 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    299 		MCLGET(m, M_DONTWAIT);
    300 		if ((m->m_flags & M_EXT) == 0) {
    301 			m_freem(m);
    302 			m = NULL;
    303 		}
    304 	}
    305 	if (m == NULL)
    306 		goto freeit;
    307 	m->m_len = icmplen + ICMP_MINLEN;
    308 	if ((m->m_flags & M_EXT) == 0)
    309 		MH_ALIGN(m, m->m_len);
    310 	icp = mtod(m, struct icmp *);
    311 	if ((u_int)type > ICMP_MAXTYPE)
    312 		panic("icmp_error");
    313 	icmpstat.icps_outhist[type]++;
    314 	icp->icmp_type = type;
    315 	if (type == ICMP_REDIRECT)
    316 		icp->icmp_gwaddr.s_addr = dest;
    317 	else {
    318 		icp->icmp_void = 0;
    319 		/*
    320 		 * The following assignments assume an overlay with the
    321 		 * zeroed icmp_void field.
    322 		 */
    323 		if (type == ICMP_PARAMPROB) {
    324 			icp->icmp_pptr = code;
    325 			code = 0;
    326 		} else if (type == ICMP_UNREACH &&
    327 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    328 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    329 	}
    330 
    331 	HTONS(oip->ip_off);
    332 	HTONS(oip->ip_len);
    333 	icp->icmp_code = code;
    334 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    335 	nip = &icp->icmp_ip;
    336 
    337 	/*
    338 	 * Now, copy old ip header (without options)
    339 	 * in front of icmp message.
    340 	 */
    341 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    342 		panic("icmp len");
    343 	m->m_data -= sizeof(struct ip);
    344 	m->m_len += sizeof(struct ip);
    345 	m->m_pkthdr.len = m->m_len;
    346 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    347 	nip = mtod(m, struct ip *);
    348 	/* ip_v set in ip_output */
    349 	nip->ip_hl = sizeof(struct ip) >> 2;
    350 	nip->ip_tos = 0;
    351 	nip->ip_len = m->m_len;
    352 	/* ip_id set in ip_output */
    353 	nip->ip_off = 0;
    354 	/* ip_ttl set in icmp_reflect */
    355 	nip->ip_p = IPPROTO_ICMP;
    356 	nip->ip_src = oip->ip_src;
    357 	nip->ip_dst = oip->ip_dst;
    358 	icmp_reflect(m);
    359 
    360 freeit:
    361 	m_freem(n);
    362 }
    363 
    364 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    365 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    366 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    367 struct sockaddr_in icmpmask = { 8, 0 };
    368 
    369 /*
    370  * Process a received ICMP message.
    371  */
    372 void
    373 #if __STDC__
    374 icmp_input(struct mbuf *m, ...)
    375 #else
    376 icmp_input(m, va_alist)
    377 	struct mbuf *m;
    378 	va_dcl
    379 #endif
    380 {
    381 	int proto;
    382 	struct icmp *icp;
    383 	struct ip *ip = mtod(m, struct ip *);
    384 	int icmplen;
    385 	int i;
    386 	struct in_ifaddr *ia;
    387 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    388 	int code;
    389 	int hlen;
    390 	va_list ap;
    391 	struct rtentry *rt;
    392 
    393 	va_start(ap, m);
    394 	hlen = va_arg(ap, int);
    395 	proto = va_arg(ap, int);
    396 	va_end(ap);
    397 
    398 	/*
    399 	 * Locate icmp structure in mbuf, and check
    400 	 * that not corrupted and of at least minimum length.
    401 	 */
    402 	icmplen = ip->ip_len - hlen;
    403 #ifdef ICMPPRINTFS
    404 	if (icmpprintfs)
    405 		printf("icmp_input from %x to %x, len %d\n",
    406 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    407 		    icmplen);
    408 #endif
    409 	if (icmplen < ICMP_MINLEN) {
    410 		icmpstat.icps_tooshort++;
    411 		goto freeit;
    412 	}
    413 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    414 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    415 		icmpstat.icps_tooshort++;
    416 		return;
    417 	}
    418 	ip = mtod(m, struct ip *);
    419 	m->m_len -= hlen;
    420 	m->m_data += hlen;
    421 	icp = mtod(m, struct icmp *);
    422 	/* Don't need to assert alignment, here. */
    423 	if (in_cksum(m, icmplen)) {
    424 		icmpstat.icps_checksum++;
    425 		goto freeit;
    426 	}
    427 	m->m_len += hlen;
    428 	m->m_data -= hlen;
    429 
    430 #ifdef ICMPPRINTFS
    431 	/*
    432 	 * Message type specific processing.
    433 	 */
    434 	if (icmpprintfs)
    435 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    436 		    icp->icmp_code);
    437 #endif
    438 	if (icp->icmp_type > ICMP_MAXTYPE)
    439 		goto raw;
    440 	icmpstat.icps_inhist[icp->icmp_type]++;
    441 	code = icp->icmp_code;
    442 	switch (icp->icmp_type) {
    443 
    444 	case ICMP_UNREACH:
    445 		switch (code) {
    446 			case ICMP_UNREACH_NET:
    447 			case ICMP_UNREACH_HOST:
    448 			case ICMP_UNREACH_PROTOCOL:
    449 			case ICMP_UNREACH_PORT:
    450 			case ICMP_UNREACH_SRCFAIL:
    451 				code += PRC_UNREACH_NET;
    452 				break;
    453 
    454 			case ICMP_UNREACH_NEEDFRAG:
    455 				code = PRC_MSGSIZE;
    456 				break;
    457 
    458 			case ICMP_UNREACH_NET_UNKNOWN:
    459 			case ICMP_UNREACH_NET_PROHIB:
    460 			case ICMP_UNREACH_TOSNET:
    461 				code = PRC_UNREACH_NET;
    462 				break;
    463 
    464 			case ICMP_UNREACH_HOST_UNKNOWN:
    465 			case ICMP_UNREACH_ISOLATED:
    466 			case ICMP_UNREACH_HOST_PROHIB:
    467 			case ICMP_UNREACH_TOSHOST:
    468 				code = PRC_UNREACH_HOST;
    469 				break;
    470 
    471 			default:
    472 				goto badcode;
    473 		}
    474 		goto deliver;
    475 
    476 	case ICMP_TIMXCEED:
    477 		if (code > 1)
    478 			goto badcode;
    479 		code += PRC_TIMXCEED_INTRANS;
    480 		goto deliver;
    481 
    482 	case ICMP_PARAMPROB:
    483 		if (code > 1)
    484 			goto badcode;
    485 		code = PRC_PARAMPROB;
    486 		goto deliver;
    487 
    488 	case ICMP_SOURCEQUENCH:
    489 		if (code)
    490 			goto badcode;
    491 		code = PRC_QUENCH;
    492 		goto deliver;
    493 
    494 	deliver:
    495 		/*
    496 		 * Problem with datagram; advise higher level routines.
    497 		 */
    498 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    499 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    500 			icmpstat.icps_badlen++;
    501 			goto freeit;
    502 		}
    503 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    504 			goto badcode;
    505 		NTOHS(icp->icmp_ip.ip_len);
    506 #ifdef ICMPPRINTFS
    507 		if (icmpprintfs)
    508 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    509 #endif
    510 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    511 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    512 		if (ctlfunc)
    513 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
    514 			    &icp->icmp_ip);
    515 		break;
    516 
    517 	badcode:
    518 		icmpstat.icps_badcode++;
    519 		break;
    520 
    521 	case ICMP_ECHO:
    522 		icp->icmp_type = ICMP_ECHOREPLY;
    523 		goto reflect;
    524 
    525 	case ICMP_TSTAMP:
    526 		if (icmplen < ICMP_TSLEN) {
    527 			icmpstat.icps_badlen++;
    528 			break;
    529 		}
    530 		icp->icmp_type = ICMP_TSTAMPREPLY;
    531 		icp->icmp_rtime = iptime();
    532 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    533 		goto reflect;
    534 
    535 	case ICMP_MASKREQ:
    536 		if (icmpmaskrepl == 0)
    537 			break;
    538 		/*
    539 		 * We are not able to respond with all ones broadcast
    540 		 * unless we receive it over a point-to-point interface.
    541 		 */
    542 		if (icmplen < ICMP_MASKLEN) {
    543 			icmpstat.icps_badlen++;
    544 			break;
    545 		}
    546 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    547 		    in_nullhost(ip->ip_dst))
    548 			icmpdst.sin_addr = ip->ip_src;
    549 		else
    550 			icmpdst.sin_addr = ip->ip_dst;
    551 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    552 		    m->m_pkthdr.rcvif));
    553 		if (ia == 0)
    554 			break;
    555 		icp->icmp_type = ICMP_MASKREPLY;
    556 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    557 		if (in_nullhost(ip->ip_src)) {
    558 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    559 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    560 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    561 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    562 		}
    563 reflect:
    564 		icmpstat.icps_reflect++;
    565 		icmpstat.icps_outhist[icp->icmp_type]++;
    566 		icmp_reflect(m);
    567 		return;
    568 
    569 	case ICMP_REDIRECT:
    570 		if (code > 3)
    571 			goto badcode;
    572 		if (icmp_rediraccept == 0)
    573 			goto freeit;
    574 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    575 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    576 			icmpstat.icps_badlen++;
    577 			break;
    578 		}
    579 		/*
    580 		 * Short circuit routing redirects to force
    581 		 * immediate change in the kernel's routing
    582 		 * tables.  The message is also handed to anyone
    583 		 * listening on a raw socket (e.g. the routing
    584 		 * daemon for use in updating its tables).
    585 		 */
    586 		icmpgw.sin_addr = ip->ip_src;
    587 		icmpdst.sin_addr = icp->icmp_gwaddr;
    588 #ifdef	ICMPPRINTFS
    589 		if (icmpprintfs)
    590 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    591 			    icp->icmp_gwaddr);
    592 #endif
    593 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    594 		rt = NULL;
    595 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    596 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    597 		    sintosa(&icmpgw), (struct rtentry **)&rt);
    598 		if (rt != NULL && icmp_redirtimeout != 0) {
    599 			i = rt_timer_add(rt, icmp_redirect_timeout,
    600 					 icmp_redirect_timeout_q);
    601 			if (i)
    602 				log(LOG_ERR, "ICMP:  redirect failed to "
    603 				    "register timeout for route to %x, "
    604 				    "code %d\n",
    605 				    icp->icmp_ip.ip_dst.s_addr, i);
    606 		}
    607 		if (rt != NULL)
    608 			rtfree(rt);
    609 
    610 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    611 #ifdef IPSEC
    612 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    613 #endif
    614 		break;
    615 
    616 	/*
    617 	 * No kernel processing for the following;
    618 	 * just fall through to send to raw listener.
    619 	 */
    620 	case ICMP_ECHOREPLY:
    621 	case ICMP_ROUTERADVERT:
    622 	case ICMP_ROUTERSOLICIT:
    623 	case ICMP_TSTAMPREPLY:
    624 	case ICMP_IREQREPLY:
    625 	case ICMP_MASKREPLY:
    626 	default:
    627 		break;
    628 	}
    629 
    630 raw:
    631 	rip_input(m, hlen, proto);
    632 	return;
    633 
    634 freeit:
    635 	m_freem(m);
    636 	return;
    637 }
    638 
    639 /*
    640  * Reflect the ip packet back to the source
    641  */
    642 void
    643 icmp_reflect(m)
    644 	struct mbuf *m;
    645 {
    646 	struct ip *ip = mtod(m, struct ip *);
    647 	struct in_ifaddr *ia;
    648 	struct ifaddr *ifa;
    649 	struct sockaddr_in *sin = 0;
    650 	struct in_addr t;
    651 	struct mbuf *opts = 0;
    652 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    653 
    654 	if (!in_canforward(ip->ip_src) &&
    655 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    656 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    657 		m_freem(m);	/* Bad return address */
    658 		goto done;	/* ip_output() will check for broadcast */
    659 	}
    660 	t = ip->ip_dst;
    661 	ip->ip_dst = ip->ip_src;
    662 	/*
    663 	 * If the incoming packet was addressed directly to us, use
    664 	 * dst as the src for the reply.  Otherwise (broadcast or
    665 	 * anonymous), use an address which corresponds to the
    666 	 * incoming interface, with a preference for the address which
    667 	 * corresponds to the route to the destination of the ICMP.
    668 	 */
    669 
    670 	/* Look for packet addressed to us */
    671 	INADDR_TO_IA(t, ia);
    672 
    673 	/* look for packet sent to broadcast address */
    674 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    675 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
    676 			if (ifa->ifa_addr->sa_family != AF_INET)
    677 				continue;
    678 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    679 				ia = ifatoia(ifa);
    680 				break;
    681 			}
    682 		}
    683 	}
    684 
    685 	if (ia)
    686 		sin = &ia->ia_addr;
    687 
    688 	icmpdst.sin_addr = t;
    689 
    690 	/* if the packet is addressed somewhere else, compute the
    691 	   source address for packets routed back to the source, and
    692 	   use that, if it's an address on the interface which
    693 	   received the packet */
    694 	if (sin == (struct sockaddr_in *)0) {
    695 		struct sockaddr_in sin_dst;
    696 		struct route icmproute;
    697 		int errornum;
    698 
    699 		sin_dst.sin_family = AF_INET;
    700 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    701 		sin_dst.sin_addr = ip->ip_dst;
    702 		bzero(&icmproute, sizeof(icmproute));
    703 		errornum = 0;
    704 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    705 		/* errornum is never used */
    706 		if (icmproute.ro_rt)
    707 			RTFREE(icmproute.ro_rt);
    708 		/* check to make sure sin is a source address on rcvif */
    709 		if (sin) {
    710 			t = sin->sin_addr;
    711 			sin = (struct sockaddr_in *)0;
    712 			INADDR_TO_IA(t, ia);
    713 			while (ia) {
    714 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    715 					sin = &ia->ia_addr;
    716 					break;
    717 				}
    718 				NEXT_IA_WITH_SAME_ADDR(ia);
    719 			}
    720 		}
    721 	}
    722 
    723 	/* if it was not addressed to us, but the route doesn't go out
    724 	   the source interface, pick an address on the source
    725 	   interface.  This can happen when routing is asymmetric, or
    726 	   when the incoming packet was encapsulated */
    727 	if (sin == (struct sockaddr_in *)0) {
    728 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
    729 			if (ifa->ifa_addr->sa_family != AF_INET)
    730 				continue;
    731 			sin = &(ifatoia(ifa)->ia_addr);
    732 			break;
    733 		}
    734 	}
    735 
    736 	/*
    737 	 * The following happens if the packet was not addressed to us,
    738 	 * and was received on an interface with no IP address:
    739 	 * We find the first AF_INET address on the first non-loopback
    740 	 * interface.
    741 	 */
    742 	if (sin == (struct sockaddr_in *)0)
    743 		TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
    744 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    745 				continue;
    746 			sin = &ia->ia_addr;
    747 			break;
    748 		}
    749 
    750 	/*
    751 	 * If we still didn't find an address, punt.  We could have an
    752 	 * interface up (and receiving packets) with no address.
    753 	 */
    754 	if (sin == (struct sockaddr_in *)0) {
    755 		m_freem(m);
    756 		goto done;
    757 	}
    758 
    759 	ip->ip_src = sin->sin_addr;
    760 	ip->ip_ttl = MAXTTL;
    761 
    762 	if (optlen > 0) {
    763 		u_char *cp;
    764 		int opt, cnt;
    765 		u_int len;
    766 
    767 		/*
    768 		 * Retrieve any source routing from the incoming packet;
    769 		 * add on any record-route or timestamp options.
    770 		 */
    771 		cp = (u_char *) (ip + 1);
    772 		if ((opts = ip_srcroute()) == 0 &&
    773 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    774 			opts->m_len = sizeof(struct in_addr);
    775 			*mtod(opts, struct in_addr *) = zeroin_addr;
    776 		}
    777 		if (opts) {
    778 #ifdef ICMPPRINTFS
    779 		    if (icmpprintfs)
    780 			    printf("icmp_reflect optlen %d rt %d => ",
    781 				optlen, opts->m_len);
    782 #endif
    783 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    784 			    opt = cp[IPOPT_OPTVAL];
    785 			    if (opt == IPOPT_EOL)
    786 				    break;
    787 			    if (opt == IPOPT_NOP)
    788 				    len = 1;
    789 			    else {
    790 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    791 					    break;
    792 				    len = cp[IPOPT_OLEN];
    793 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    794 				        len > cnt)
    795 					    break;
    796 			    }
    797 			    /*
    798 			     * Should check for overflow, but it "can't happen"
    799 			     */
    800 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    801 				opt == IPOPT_SECURITY) {
    802 				    bcopy((caddr_t)cp,
    803 					mtod(opts, caddr_t) + opts->m_len, len);
    804 				    opts->m_len += len;
    805 			    }
    806 		    }
    807 		    /* Terminate & pad, if necessary */
    808 		    if ((cnt = opts->m_len % 4) != 0) {
    809 			    for (; cnt < 4; cnt++) {
    810 				    *(mtod(opts, caddr_t) + opts->m_len) =
    811 					IPOPT_EOL;
    812 				    opts->m_len++;
    813 			    }
    814 		    }
    815 #ifdef ICMPPRINTFS
    816 		    if (icmpprintfs)
    817 			    printf("%d\n", opts->m_len);
    818 #endif
    819 		}
    820 		/*
    821 		 * Now strip out original options by copying rest of first
    822 		 * mbuf's data back, and adjust the IP length.
    823 		 */
    824 		ip->ip_len -= optlen;
    825 		ip->ip_hl = sizeof(struct ip) >> 2;
    826 		m->m_len -= optlen;
    827 		if (m->m_flags & M_PKTHDR)
    828 			m->m_pkthdr.len -= optlen;
    829 		optlen += sizeof(struct ip);
    830 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    831 			 (unsigned)(m->m_len - sizeof(struct ip)));
    832 	}
    833 	m->m_flags &= ~(M_BCAST|M_MCAST);
    834 	icmp_send(m, opts);
    835 done:
    836 	if (opts)
    837 		(void)m_free(opts);
    838 }
    839 
    840 /*
    841  * Send an icmp packet back to the ip level,
    842  * after supplying a checksum.
    843  */
    844 void
    845 icmp_send(m, opts)
    846 	struct mbuf *m;
    847 	struct mbuf *opts;
    848 {
    849 	struct ip *ip = mtod(m, struct ip *);
    850 	int hlen;
    851 	struct icmp *icp;
    852 
    853 	hlen = ip->ip_hl << 2;
    854 	m->m_data += hlen;
    855 	m->m_len -= hlen;
    856 	icp = mtod(m, struct icmp *);
    857 	icp->icmp_cksum = 0;
    858 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    859 	m->m_data -= hlen;
    860 	m->m_len += hlen;
    861 #ifdef ICMPPRINTFS
    862 	if (icmpprintfs)
    863 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    864 #endif
    865 #ifdef IPSEC
    866 	/* Don't lookup socket */
    867 	(void)ipsec_setsocket(m, NULL);
    868 #endif
    869 	(void) ip_output(m, opts, NULL, 0, NULL);
    870 }
    871 
    872 n_time
    873 iptime()
    874 {
    875 	struct timeval atv;
    876 	u_long t;
    877 
    878 	microtime(&atv);
    879 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    880 	return (htonl(t));
    881 }
    882 
    883 int
    884 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    885 	int *name;
    886 	u_int namelen;
    887 	void *oldp;
    888 	size_t *oldlenp;
    889 	void *newp;
    890 	size_t newlen;
    891 {
    892 	int arg, error;
    893 
    894 	/* All sysctl names at this level are terminal. */
    895 	if (namelen != 1)
    896 		return (ENOTDIR);
    897 
    898 	switch (name[0])
    899 	{
    900 	case ICMPCTL_MASKREPL:
    901 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    902 		break;
    903 	case ICMPCTL_RETURNDATABYTES:
    904 		arg = icmpreturndatabytes;
    905 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    906 		if (error)
    907 			break;
    908 		if ((arg >= 8) || (arg <= 512))
    909 			icmpreturndatabytes = arg;
    910 		else
    911 			error = EINVAL;
    912 		break;
    913 	case ICMPCTL_ERRPPSLIMIT:
    914 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    915 		break;
    916 	case ICMPCTL_REDIRACCEPT:
    917 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    918 				   &icmp_rediraccept);
    919 		break;
    920 	case ICMPCTL_REDIRTIMEOUT:
    921 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    922 				   &icmp_redirtimeout);
    923 		if (icmp_redirect_timeout_q != NULL) {
    924 			if (icmp_redirtimeout == 0) {
    925 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
    926 						       TRUE);
    927 				icmp_redirect_timeout_q = NULL;
    928 			} else {
    929 				rt_timer_queue_change(icmp_redirect_timeout_q,
    930 						      icmp_redirtimeout);
    931 			}
    932 		} else if (icmp_redirtimeout > 0) {
    933 			icmp_redirect_timeout_q =
    934 				rt_timer_queue_create(icmp_redirtimeout);
    935 		}
    936 		return (error);
    937 
    938 		break;
    939 	default:
    940 		error = ENOPROTOOPT;
    941 		break;
    942 	}
    943 	return error;
    944 }
    945 
    946 /* Table of common MTUs: */
    947 
    948 static const u_int mtu_table[] = {
    949 	65535, 65280, 32000, 17914, 9180, 8166,
    950 	4352, 2002, 1492, 1006, 508, 296, 68, 0
    951 };
    952 
    953 void
    954 icmp_mtudisc(icp, faddr)
    955 	struct icmp *icp;
    956 	struct in_addr faddr;
    957 {
    958 	struct icmp_mtudisc_callback *mc;
    959 	struct sockaddr *dst = sintosa(&icmpsrc);
    960 	struct rtentry *rt;
    961 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    962 	int    error;
    963 
    964 	rt = rtalloc1(dst, 1);
    965 	if (rt == 0)
    966 		return;
    967 
    968 	/* If we didn't get a host route, allocate one */
    969 
    970 	if ((rt->rt_flags & RTF_HOST) == 0) {
    971 		struct rtentry *nrt;
    972 
    973 		error = rtrequest((int) RTM_ADD, dst,
    974 		    (struct sockaddr *) rt->rt_gateway,
    975 		    (struct sockaddr *) 0,
    976 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    977 		if (error) {
    978 			rtfree(rt);
    979 			return;
    980 		}
    981 		nrt->rt_rmx = rt->rt_rmx;
    982 		rtfree(rt);
    983 		rt = nrt;
    984 	}
    985 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    986 	if (error) {
    987 		rtfree(rt);
    988 		return;
    989 	}
    990 
    991 	if (mtu == 0) {
    992 		int i = 0;
    993 
    994 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    995 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    996 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    997 			mtu -= (icp->icmp_ip.ip_hl << 2);
    998 
    999 		/* If we still can't guess a value, try the route */
   1000 
   1001 		if (mtu == 0) {
   1002 			mtu = rt->rt_rmx.rmx_mtu;
   1003 
   1004 			/* If no route mtu, default to the interface mtu */
   1005 
   1006 			if (mtu == 0)
   1007 				mtu = rt->rt_ifp->if_mtu;
   1008 		}
   1009 
   1010 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
   1011 			if (mtu > mtu_table[i]) {
   1012 				mtu = mtu_table[i];
   1013 				break;
   1014 			}
   1015 	}
   1016 
   1017 	/*
   1018 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
   1019 	 *	  to turn off PMTU for a route, and the kernel can
   1020 	 *	  set it to indicate a serious problem with PMTU
   1021 	 *	  on a route.  We should be using a separate flag
   1022 	 *	  for the kernel to indicate this.
   1023 	 */
   1024 
   1025 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1026 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
   1027 			rt->rt_rmx.rmx_locks |= RTV_MTU;
   1028 		else if (rt->rt_rmx.rmx_mtu > mtu ||
   1029 			 rt->rt_rmx.rmx_mtu == 0) {
   1030 			icmpstat.icps_pmtuchg++;
   1031 			rt->rt_rmx.rmx_mtu = mtu;
   1032 		}
   1033 	}
   1034 
   1035 	if (rt)
   1036 		rtfree(rt);
   1037 
   1038 	/*
   1039 	 * Notify protocols that the MTU for this destination
   1040 	 * has changed.
   1041 	 */
   1042 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
   1043 	     mc = LIST_NEXT(mc, mc_list))
   1044 		(*mc->mc_func)(faddr);
   1045 }
   1046 
   1047 /*
   1048  * Return the next larger or smaller MTU plateau (table from RFC 1191)
   1049  * given current value MTU.  If DIR is less than zero, a larger plateau
   1050  * is returned; otherwise, a smaller value is returned.
   1051  */
   1052 int
   1053 ip_next_mtu(mtu, dir)	/* XXX */
   1054 	int mtu;
   1055 	int dir;
   1056 {
   1057 	int i;
   1058 
   1059 	for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
   1060 		if (mtu >= mtu_table[i])
   1061 			break;
   1062 	}
   1063 
   1064 	if (dir < 0) {
   1065 		if (i == 0) {
   1066 			return 0;
   1067 		} else {
   1068 			return mtu_table[i - 1];
   1069 		}
   1070 	} else {
   1071 		if (mtu_table[i] == 0) {
   1072 			return 0;
   1073 		} else if (mtu > mtu_table[i]) {
   1074 			return mtu_table[i];
   1075 		} else {
   1076 			return mtu_table[i + 1];
   1077 		}
   1078 	}
   1079 }
   1080 
   1081 static void
   1082 icmp_mtudisc_timeout(rt, r)
   1083 	struct rtentry *rt;
   1084 	struct rttimer *r;
   1085 {
   1086 	if (rt == NULL)
   1087 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1088 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1089 	    (RTF_DYNAMIC | RTF_HOST)) {
   1090 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1091 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1092 	} else {
   1093 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1094 			rt->rt_rmx.rmx_mtu = 0;
   1095 		}
   1096 	}
   1097 }
   1098 
   1099 static void
   1100 icmp_redirect_timeout(rt, r)
   1101 	struct rtentry *rt;
   1102 	struct rttimer *r;
   1103 {
   1104 	if (rt == NULL)
   1105 		panic("icmp_redirect_timeout:  bad route to timeout");
   1106 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1107 	    (RTF_DYNAMIC | RTF_HOST)) {
   1108 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1109 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1110 	}
   1111 }
   1112 
   1113 /*
   1114  * Perform rate limit check.
   1115  * Returns 0 if it is okay to send the icmp packet.
   1116  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1117  * limitation.
   1118  *
   1119  * XXX per-destination/type check necessary?
   1120  */
   1121 static int
   1122 icmp_ratelimit(dst, type, code)
   1123 	const struct in_addr *dst;
   1124 	const int type;			/* not used at this moment */
   1125 	const int code;			/* not used at this moment */
   1126 {
   1127 
   1128 	/* PPS limit */
   1129 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1130 	    icmperrppslim)) {
   1131 		/* The packet is subject to rate limit */
   1132 		return 1;
   1133 	}
   1134 
   1135 	/*okay to send*/
   1136 	return 0;
   1137 }
   1138