Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.67
      1 /*	$NetBSD: ip_icmp.c,v 1.67 2002/06/09 16:33:40 itojun Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * This code is derived from software contributed to The NetBSD Foundation
     41  * by Jason R. Thorpe of Zembu Labs, Inc.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the NetBSD
     54  *	Foundation, Inc. and its contributors.
     55  * 4. Neither the name of The NetBSD Foundation nor the names of its
     56  *    contributors may be used to endorse or promote products derived
     57  *    from this software without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     69  * POSSIBILITY OF SUCH DAMAGE.
     70  */
     71 
     72 /*
     73  * Copyright (c) 1982, 1986, 1988, 1993
     74  *	The Regents of the University of California.  All rights reserved.
     75  *
     76  * Redistribution and use in source and binary forms, with or without
     77  * modification, are permitted provided that the following conditions
     78  * are met:
     79  * 1. Redistributions of source code must retain the above copyright
     80  *    notice, this list of conditions and the following disclaimer.
     81  * 2. Redistributions in binary form must reproduce the above copyright
     82  *    notice, this list of conditions and the following disclaimer in the
     83  *    documentation and/or other materials provided with the distribution.
     84  * 3. All advertising materials mentioning features or use of this software
     85  *    must display the following acknowledgement:
     86  *	This product includes software developed by the University of
     87  *	California, Berkeley and its contributors.
     88  * 4. Neither the name of the University nor the names of its contributors
     89  *    may be used to endorse or promote products derived from this software
     90  *    without specific prior written permission.
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    102  * SUCH DAMAGE.
    103  *
    104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    105  */
    106 
    107 #include <sys/cdefs.h>
    108 __KERNEL_RCSID(0, "$NetBSD: ip_icmp.c,v 1.67 2002/06/09 16:33:40 itojun Exp $");
    109 
    110 #include "opt_ipsec.h"
    111 
    112 #include <sys/param.h>
    113 #include <sys/systm.h>
    114 #include <sys/malloc.h>
    115 #include <sys/mbuf.h>
    116 #include <sys/protosw.h>
    117 #include <sys/socket.h>
    118 #include <sys/time.h>
    119 #include <sys/kernel.h>
    120 #include <sys/syslog.h>
    121 #include <sys/sysctl.h>
    122 
    123 #include <net/if.h>
    124 #include <net/route.h>
    125 
    126 #include <netinet/in.h>
    127 #include <netinet/in_systm.h>
    128 #include <netinet/in_var.h>
    129 #include <netinet/ip.h>
    130 #include <netinet/ip_icmp.h>
    131 #include <netinet/ip_var.h>
    132 #include <netinet/in_pcb.h>
    133 #include <netinet/icmp_var.h>
    134 
    135 #ifdef IPSEC
    136 #include <netinet6/ipsec.h>
    137 #include <netkey/key.h>
    138 #endif
    139 
    140 #include <machine/stdarg.h>
    141 
    142 /*
    143  * ICMP routines: error generation, receive packet processing, and
    144  * routines to turnaround packets back to the originator, and
    145  * host table maintenance routines.
    146  */
    147 
    148 int	icmpmaskrepl = 0;
    149 #ifdef ICMPPRINTFS
    150 int	icmpprintfs = 0;
    151 #endif
    152 int	icmpreturndatabytes = 8;
    153 
    154 /*
    155  * List of callbacks to notify when Path MTU changes are made.
    156  */
    157 struct icmp_mtudisc_callback {
    158 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
    159 	void (*mc_func) __P((struct in_addr));
    160 };
    161 
    162 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
    163     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
    164 
    165 #if 0
    166 static int	ip_next_mtu __P((int, int));
    167 #else
    168 /*static*/ int	ip_next_mtu __P((int, int));
    169 #endif
    170 
    171 extern int icmperrppslim;
    172 static int icmperrpps_count = 0;
    173 static struct timeval icmperrppslim_last;
    174 static int icmp_rediraccept = 1;
    175 static int icmp_redirtimeout = 0;
    176 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
    177 
    178 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    179 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
    180 
    181 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    182 
    183 
    184 void
    185 icmp_init()
    186 {
    187 	/*
    188 	 * This is only useful if the user initializes redirtimeout to
    189 	 * something other than zero.
    190 	 */
    191 	if (icmp_redirtimeout != 0) {
    192 		icmp_redirect_timeout_q =
    193 			rt_timer_queue_create(icmp_redirtimeout);
    194 	}
    195 }
    196 
    197 /*
    198  * Register a Path MTU Discovery callback.
    199  */
    200 void
    201 icmp_mtudisc_callback_register(func)
    202 	void (*func) __P((struct in_addr));
    203 {
    204 	struct icmp_mtudisc_callback *mc;
    205 
    206 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    207 	     mc = LIST_NEXT(mc, mc_list)) {
    208 		if (mc->mc_func == func)
    209 			return;
    210 	}
    211 
    212 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
    213 	if (mc == NULL)
    214 		panic("icmp_mtudisc_callback_register");
    215 
    216 	mc->mc_func = func;
    217 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
    218 }
    219 
    220 /*
    221  * Generate an error packet of type error
    222  * in response to bad packet ip.
    223  */
    224 void
    225 icmp_error(n, type, code, dest, destifp)
    226 	struct mbuf *n;
    227 	int type, code;
    228 	n_long dest;
    229 	struct ifnet *destifp;
    230 {
    231 	struct ip *oip = mtod(n, struct ip *), *nip;
    232 	unsigned oiplen = oip->ip_hl << 2;
    233 	struct icmp *icp;
    234 	struct mbuf *m;
    235 	unsigned icmplen, mblen;
    236 
    237 #ifdef ICMPPRINTFS
    238 	if (icmpprintfs)
    239 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    240 #endif
    241 	if (type != ICMP_REDIRECT)
    242 		icmpstat.icps_error++;
    243 	/*
    244 	 * Don't send error if the original packet was encrypted.
    245 	 * Don't send error if not the first fragment of message.
    246 	 * Don't error if the old packet protocol was ICMP
    247 	 * error message, only known informational types.
    248 	 */
    249 	if (n->m_flags & M_DECRYPTED)
    250 		goto freeit;
    251 	if (oip->ip_off &~ (IP_MF|IP_DF))
    252 		goto freeit;
    253 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    254 	  n->m_len >= oiplen + ICMP_MINLEN &&
    255 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    256 		icmpstat.icps_oldicmp++;
    257 		goto freeit;
    258 	}
    259 	/* Don't send error in response to a multicast or broadcast packet */
    260 	if (n->m_flags & (M_BCAST|M_MCAST))
    261 		goto freeit;
    262 
    263 	/*
    264 	 * First, do a rate limitation check.
    265 	 */
    266 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    267 		/* XXX stat */
    268 		goto freeit;
    269 	}
    270 
    271 	/*
    272 	 * Now, formulate icmp message
    273 	 */
    274 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    275 	/*
    276 	 * Defend against mbuf chains shorter than oip->ip_len:
    277 	 */
    278 	mblen = 0;
    279 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    280 		mblen += m->m_len;
    281 	icmplen = min(mblen, icmplen);
    282 
    283 	/*
    284 	 * As we are not required to return everything we have,
    285 	 * we return whatever we can return at ease.
    286 	 *
    287 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    288 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    289 	 * icmp_sysctl will keep things below that limit.
    290 	 */
    291 
    292 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    293 
    294 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    295 		icmplen = MCLBYTES - ICMP_MINLEN;
    296 
    297 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    298 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    299 		MCLGET(m, M_DONTWAIT);
    300 		if ((m->m_flags & M_EXT) == 0) {
    301 			m_freem(m);
    302 			m = NULL;
    303 		}
    304 	}
    305 	if (m == NULL)
    306 		goto freeit;
    307 	m->m_len = icmplen + ICMP_MINLEN;
    308 	if ((m->m_flags & M_EXT) == 0)
    309 		MH_ALIGN(m, m->m_len);
    310 	icp = mtod(m, struct icmp *);
    311 	if ((u_int)type > ICMP_MAXTYPE)
    312 		panic("icmp_error");
    313 	icmpstat.icps_outhist[type]++;
    314 	icp->icmp_type = type;
    315 	if (type == ICMP_REDIRECT)
    316 		icp->icmp_gwaddr.s_addr = dest;
    317 	else {
    318 		icp->icmp_void = 0;
    319 		/*
    320 		 * The following assignments assume an overlay with the
    321 		 * zeroed icmp_void field.
    322 		 */
    323 		if (type == ICMP_PARAMPROB) {
    324 			icp->icmp_pptr = code;
    325 			code = 0;
    326 		} else if (type == ICMP_UNREACH &&
    327 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    328 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    329 	}
    330 
    331 	HTONS(oip->ip_off);
    332 	HTONS(oip->ip_len);
    333 	icp->icmp_code = code;
    334 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    335 	nip = &icp->icmp_ip;
    336 
    337 	/*
    338 	 * Now, copy old ip header (without options)
    339 	 * in front of icmp message.
    340 	 */
    341 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    342 		panic("icmp len");
    343 	m->m_data -= sizeof(struct ip);
    344 	m->m_len += sizeof(struct ip);
    345 	m->m_pkthdr.len = m->m_len;
    346 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    347 	nip = mtod(m, struct ip *);
    348 	/* ip_v set in ip_output */
    349 	nip->ip_hl = sizeof(struct ip) >> 2;
    350 	nip->ip_tos = 0;
    351 	nip->ip_len = m->m_len;
    352 	/* ip_id set in ip_output */
    353 	nip->ip_off = 0;
    354 	/* ip_ttl set in icmp_reflect */
    355 	nip->ip_p = IPPROTO_ICMP;
    356 	nip->ip_src = oip->ip_src;
    357 	nip->ip_dst = oip->ip_dst;
    358 	icmp_reflect(m);
    359 
    360 freeit:
    361 	m_freem(n);
    362 }
    363 
    364 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    365 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    366 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    367 struct sockaddr_in icmpmask = { 8, 0 };
    368 
    369 /*
    370  * Process a received ICMP message.
    371  */
    372 void
    373 #if __STDC__
    374 icmp_input(struct mbuf *m, ...)
    375 #else
    376 icmp_input(m, va_alist)
    377 	struct mbuf *m;
    378 	va_dcl
    379 #endif
    380 {
    381 	int proto;
    382 	struct icmp *icp;
    383 	struct ip *ip = mtod(m, struct ip *);
    384 	int icmplen;
    385 	int i;
    386 	struct in_ifaddr *ia;
    387 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    388 	int code;
    389 	int hlen;
    390 	va_list ap;
    391 	struct rtentry *rt;
    392 
    393 	va_start(ap, m);
    394 	hlen = va_arg(ap, int);
    395 	proto = va_arg(ap, int);
    396 	va_end(ap);
    397 
    398 	/*
    399 	 * Locate icmp structure in mbuf, and check
    400 	 * that not corrupted and of at least minimum length.
    401 	 */
    402 	icmplen = ip->ip_len - hlen;
    403 #ifdef ICMPPRINTFS
    404 	if (icmpprintfs)
    405 		printf("icmp_input from %x to %x, len %d\n",
    406 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    407 		    icmplen);
    408 #endif
    409 	if (icmplen < ICMP_MINLEN) {
    410 		icmpstat.icps_tooshort++;
    411 		goto freeit;
    412 	}
    413 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    414 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    415 		icmpstat.icps_tooshort++;
    416 		return;
    417 	}
    418 	ip = mtod(m, struct ip *);
    419 	m->m_len -= hlen;
    420 	m->m_data += hlen;
    421 	icp = mtod(m, struct icmp *);
    422 	if (in_cksum(m, icmplen)) {
    423 		icmpstat.icps_checksum++;
    424 		goto freeit;
    425 	}
    426 	m->m_len += hlen;
    427 	m->m_data -= hlen;
    428 
    429 #ifdef ICMPPRINTFS
    430 	/*
    431 	 * Message type specific processing.
    432 	 */
    433 	if (icmpprintfs)
    434 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    435 		    icp->icmp_code);
    436 #endif
    437 	if (icp->icmp_type > ICMP_MAXTYPE)
    438 		goto raw;
    439 	icmpstat.icps_inhist[icp->icmp_type]++;
    440 	code = icp->icmp_code;
    441 	switch (icp->icmp_type) {
    442 
    443 	case ICMP_UNREACH:
    444 		switch (code) {
    445 			case ICMP_UNREACH_NET:
    446 			case ICMP_UNREACH_HOST:
    447 			case ICMP_UNREACH_PROTOCOL:
    448 			case ICMP_UNREACH_PORT:
    449 			case ICMP_UNREACH_SRCFAIL:
    450 				code += PRC_UNREACH_NET;
    451 				break;
    452 
    453 			case ICMP_UNREACH_NEEDFRAG:
    454 				code = PRC_MSGSIZE;
    455 				break;
    456 
    457 			case ICMP_UNREACH_NET_UNKNOWN:
    458 			case ICMP_UNREACH_NET_PROHIB:
    459 			case ICMP_UNREACH_TOSNET:
    460 				code = PRC_UNREACH_NET;
    461 				break;
    462 
    463 			case ICMP_UNREACH_HOST_UNKNOWN:
    464 			case ICMP_UNREACH_ISOLATED:
    465 			case ICMP_UNREACH_HOST_PROHIB:
    466 			case ICMP_UNREACH_TOSHOST:
    467 				code = PRC_UNREACH_HOST;
    468 				break;
    469 
    470 			default:
    471 				goto badcode;
    472 		}
    473 		goto deliver;
    474 
    475 	case ICMP_TIMXCEED:
    476 		if (code > 1)
    477 			goto badcode;
    478 		code += PRC_TIMXCEED_INTRANS;
    479 		goto deliver;
    480 
    481 	case ICMP_PARAMPROB:
    482 		if (code > 1)
    483 			goto badcode;
    484 		code = PRC_PARAMPROB;
    485 		goto deliver;
    486 
    487 	case ICMP_SOURCEQUENCH:
    488 		if (code)
    489 			goto badcode;
    490 		code = PRC_QUENCH;
    491 		goto deliver;
    492 
    493 	deliver:
    494 		/*
    495 		 * Problem with datagram; advise higher level routines.
    496 		 */
    497 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    498 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    499 			icmpstat.icps_badlen++;
    500 			goto freeit;
    501 		}
    502 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    503 			goto badcode;
    504 		NTOHS(icp->icmp_ip.ip_len);
    505 #ifdef ICMPPRINTFS
    506 		if (icmpprintfs)
    507 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    508 #endif
    509 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    510 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    511 		if (ctlfunc)
    512 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
    513 			    &icp->icmp_ip);
    514 		break;
    515 
    516 	badcode:
    517 		icmpstat.icps_badcode++;
    518 		break;
    519 
    520 	case ICMP_ECHO:
    521 		icp->icmp_type = ICMP_ECHOREPLY;
    522 		goto reflect;
    523 
    524 	case ICMP_TSTAMP:
    525 		if (icmplen < ICMP_TSLEN) {
    526 			icmpstat.icps_badlen++;
    527 			break;
    528 		}
    529 		icp->icmp_type = ICMP_TSTAMPREPLY;
    530 		icp->icmp_rtime = iptime();
    531 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    532 		goto reflect;
    533 
    534 	case ICMP_MASKREQ:
    535 		if (icmpmaskrepl == 0)
    536 			break;
    537 		/*
    538 		 * We are not able to respond with all ones broadcast
    539 		 * unless we receive it over a point-to-point interface.
    540 		 */
    541 		if (icmplen < ICMP_MASKLEN) {
    542 			icmpstat.icps_badlen++;
    543 			break;
    544 		}
    545 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    546 		    in_nullhost(ip->ip_dst))
    547 			icmpdst.sin_addr = ip->ip_src;
    548 		else
    549 			icmpdst.sin_addr = ip->ip_dst;
    550 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    551 		    m->m_pkthdr.rcvif));
    552 		if (ia == 0)
    553 			break;
    554 		icp->icmp_type = ICMP_MASKREPLY;
    555 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    556 		if (in_nullhost(ip->ip_src)) {
    557 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    558 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    559 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    560 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    561 		}
    562 reflect:
    563 		icmpstat.icps_reflect++;
    564 		icmpstat.icps_outhist[icp->icmp_type]++;
    565 		icmp_reflect(m);
    566 		return;
    567 
    568 	case ICMP_REDIRECT:
    569 		if (code > 3)
    570 			goto badcode;
    571 		if (icmp_rediraccept == 0)
    572 			goto freeit;
    573 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    574 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    575 			icmpstat.icps_badlen++;
    576 			break;
    577 		}
    578 		/*
    579 		 * Short circuit routing redirects to force
    580 		 * immediate change in the kernel's routing
    581 		 * tables.  The message is also handed to anyone
    582 		 * listening on a raw socket (e.g. the routing
    583 		 * daemon for use in updating its tables).
    584 		 */
    585 		icmpgw.sin_addr = ip->ip_src;
    586 		icmpdst.sin_addr = icp->icmp_gwaddr;
    587 #ifdef	ICMPPRINTFS
    588 		if (icmpprintfs)
    589 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    590 			    icp->icmp_gwaddr);
    591 #endif
    592 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    593 		rt = NULL;
    594 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    595 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    596 		    sintosa(&icmpgw), (struct rtentry **)&rt);
    597 		if (rt != NULL && icmp_redirtimeout != 0) {
    598 			i = rt_timer_add(rt, icmp_redirect_timeout,
    599 					 icmp_redirect_timeout_q);
    600 			if (i)
    601 				log(LOG_ERR, "ICMP:  redirect failed to "
    602 				    "register timeout for route to %x, "
    603 				    "code %d\n",
    604 				    icp->icmp_ip.ip_dst.s_addr, i);
    605 		}
    606 		if (rt != NULL)
    607 			rtfree(rt);
    608 
    609 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    610 #ifdef IPSEC
    611 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    612 #endif
    613 		break;
    614 
    615 	/*
    616 	 * No kernel processing for the following;
    617 	 * just fall through to send to raw listener.
    618 	 */
    619 	case ICMP_ECHOREPLY:
    620 	case ICMP_ROUTERADVERT:
    621 	case ICMP_ROUTERSOLICIT:
    622 	case ICMP_TSTAMPREPLY:
    623 	case ICMP_IREQREPLY:
    624 	case ICMP_MASKREPLY:
    625 	default:
    626 		break;
    627 	}
    628 
    629 raw:
    630 	rip_input(m, hlen, proto);
    631 	return;
    632 
    633 freeit:
    634 	m_freem(m);
    635 	return;
    636 }
    637 
    638 /*
    639  * Reflect the ip packet back to the source
    640  */
    641 void
    642 icmp_reflect(m)
    643 	struct mbuf *m;
    644 {
    645 	struct ip *ip = mtod(m, struct ip *);
    646 	struct in_ifaddr *ia;
    647 	struct ifaddr *ifa;
    648 	struct sockaddr_in *sin = 0;
    649 	struct in_addr t;
    650 	struct mbuf *opts = 0;
    651 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    652 
    653 	if (!in_canforward(ip->ip_src) &&
    654 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    655 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    656 		m_freem(m);	/* Bad return address */
    657 		goto done;	/* ip_output() will check for broadcast */
    658 	}
    659 	t = ip->ip_dst;
    660 	ip->ip_dst = ip->ip_src;
    661 	/*
    662 	 * If the incoming packet was addressed directly to us, use
    663 	 * dst as the src for the reply.  Otherwise (broadcast or
    664 	 * anonymous), use an address which corresponds to the
    665 	 * incoming interface, with a preference for the address which
    666 	 * corresponds to the route to the destination of the ICMP.
    667 	 */
    668 
    669 	/* Look for packet addressed to us */
    670 	INADDR_TO_IA(t, ia);
    671 
    672 	/* look for packet sent to broadcast address */
    673 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    674 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
    675 			if (ifa->ifa_addr->sa_family != AF_INET)
    676 				continue;
    677 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    678 				ia = ifatoia(ifa);
    679 				break;
    680 			}
    681 		}
    682 	}
    683 
    684 	if (ia)
    685 		sin = &ia->ia_addr;
    686 
    687 	icmpdst.sin_addr = t;
    688 
    689 	/* if the packet is addressed somewhere else, compute the
    690 	   source address for packets routed back to the source, and
    691 	   use that, if it's an address on the interface which
    692 	   received the packet */
    693 	if (sin == (struct sockaddr_in *)0) {
    694 		struct sockaddr_in sin_dst;
    695 		struct route icmproute;
    696 		int errornum;
    697 
    698 		sin_dst.sin_family = AF_INET;
    699 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    700 		sin_dst.sin_addr = ip->ip_dst;
    701 		bzero(&icmproute, sizeof(icmproute));
    702 		errornum = 0;
    703 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    704 		/* errornum is never used */
    705 		if (icmproute.ro_rt)
    706 			RTFREE(icmproute.ro_rt);
    707 		/* check to make sure sin is a source address on rcvif */
    708 		if (sin) {
    709 			t = sin->sin_addr;
    710 			sin = (struct sockaddr_in *)0;
    711 			INADDR_TO_IA(t, ia);
    712 			while (ia) {
    713 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    714 					sin = &ia->ia_addr;
    715 					break;
    716 				}
    717 				NEXT_IA_WITH_SAME_ADDR(ia);
    718 			}
    719 		}
    720 	}
    721 
    722 	/* if it was not addressed to us, but the route doesn't go out
    723 	   the source interface, pick an address on the source
    724 	   interface.  This can happen when routing is asymmetric, or
    725 	   when the incoming packet was encapsulated */
    726 	if (sin == (struct sockaddr_in *)0) {
    727 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
    728 			if (ifa->ifa_addr->sa_family != AF_INET)
    729 				continue;
    730 			sin = &(ifatoia(ifa)->ia_addr);
    731 			break;
    732 		}
    733 	}
    734 
    735 	/*
    736 	 * The following happens if the packet was not addressed to us,
    737 	 * and was received on an interface with no IP address:
    738 	 * We find the first AF_INET address on the first non-loopback
    739 	 * interface.
    740 	 */
    741 	if (sin == (struct sockaddr_in *)0)
    742 		TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
    743 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    744 				continue;
    745 			sin = &ia->ia_addr;
    746 			break;
    747 		}
    748 
    749 	/*
    750 	 * If we still didn't find an address, punt.  We could have an
    751 	 * interface up (and receiving packets) with no address.
    752 	 */
    753 	if (sin == (struct sockaddr_in *)0) {
    754 		m_freem(m);
    755 		goto done;
    756 	}
    757 
    758 	ip->ip_src = sin->sin_addr;
    759 	ip->ip_ttl = MAXTTL;
    760 
    761 	if (optlen > 0) {
    762 		u_char *cp;
    763 		int opt, cnt;
    764 		u_int len;
    765 
    766 		/*
    767 		 * Retrieve any source routing from the incoming packet;
    768 		 * add on any record-route or timestamp options.
    769 		 */
    770 		cp = (u_char *) (ip + 1);
    771 		if ((opts = ip_srcroute()) == 0 &&
    772 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    773 			opts->m_len = sizeof(struct in_addr);
    774 			*mtod(opts, struct in_addr *) = zeroin_addr;
    775 		}
    776 		if (opts) {
    777 #ifdef ICMPPRINTFS
    778 		    if (icmpprintfs)
    779 			    printf("icmp_reflect optlen %d rt %d => ",
    780 				optlen, opts->m_len);
    781 #endif
    782 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    783 			    opt = cp[IPOPT_OPTVAL];
    784 			    if (opt == IPOPT_EOL)
    785 				    break;
    786 			    if (opt == IPOPT_NOP)
    787 				    len = 1;
    788 			    else {
    789 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    790 					    break;
    791 				    len = cp[IPOPT_OLEN];
    792 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    793 				        len > cnt)
    794 					    break;
    795 			    }
    796 			    /*
    797 			     * Should check for overflow, but it "can't happen"
    798 			     */
    799 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    800 				opt == IPOPT_SECURITY) {
    801 				    bcopy((caddr_t)cp,
    802 					mtod(opts, caddr_t) + opts->m_len, len);
    803 				    opts->m_len += len;
    804 			    }
    805 		    }
    806 		    /* Terminate & pad, if necessary */
    807 		    if ((cnt = opts->m_len % 4) != 0) {
    808 			    for (; cnt < 4; cnt++) {
    809 				    *(mtod(opts, caddr_t) + opts->m_len) =
    810 					IPOPT_EOL;
    811 				    opts->m_len++;
    812 			    }
    813 		    }
    814 #ifdef ICMPPRINTFS
    815 		    if (icmpprintfs)
    816 			    printf("%d\n", opts->m_len);
    817 #endif
    818 		}
    819 		/*
    820 		 * Now strip out original options by copying rest of first
    821 		 * mbuf's data back, and adjust the IP length.
    822 		 */
    823 		ip->ip_len -= optlen;
    824 		ip->ip_hl = sizeof(struct ip) >> 2;
    825 		m->m_len -= optlen;
    826 		if (m->m_flags & M_PKTHDR)
    827 			m->m_pkthdr.len -= optlen;
    828 		optlen += sizeof(struct ip);
    829 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    830 			 (unsigned)(m->m_len - sizeof(struct ip)));
    831 	}
    832 	m->m_flags &= ~(M_BCAST|M_MCAST);
    833 	icmp_send(m, opts);
    834 done:
    835 	if (opts)
    836 		(void)m_free(opts);
    837 }
    838 
    839 /*
    840  * Send an icmp packet back to the ip level,
    841  * after supplying a checksum.
    842  */
    843 void
    844 icmp_send(m, opts)
    845 	struct mbuf *m;
    846 	struct mbuf *opts;
    847 {
    848 	struct ip *ip = mtod(m, struct ip *);
    849 	int hlen;
    850 	struct icmp *icp;
    851 
    852 	hlen = ip->ip_hl << 2;
    853 	m->m_data += hlen;
    854 	m->m_len -= hlen;
    855 	icp = mtod(m, struct icmp *);
    856 	icp->icmp_cksum = 0;
    857 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    858 	m->m_data -= hlen;
    859 	m->m_len += hlen;
    860 #ifdef ICMPPRINTFS
    861 	if (icmpprintfs)
    862 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    863 #endif
    864 #ifdef IPSEC
    865 	/* Don't lookup socket */
    866 	(void)ipsec_setsocket(m, NULL);
    867 #endif
    868 	(void) ip_output(m, opts, NULL, 0, NULL);
    869 }
    870 
    871 n_time
    872 iptime()
    873 {
    874 	struct timeval atv;
    875 	u_long t;
    876 
    877 	microtime(&atv);
    878 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    879 	return (htonl(t));
    880 }
    881 
    882 int
    883 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    884 	int *name;
    885 	u_int namelen;
    886 	void *oldp;
    887 	size_t *oldlenp;
    888 	void *newp;
    889 	size_t newlen;
    890 {
    891 	int arg, error;
    892 
    893 	/* All sysctl names at this level are terminal. */
    894 	if (namelen != 1)
    895 		return (ENOTDIR);
    896 
    897 	switch (name[0])
    898 	{
    899 	case ICMPCTL_MASKREPL:
    900 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    901 		break;
    902 	case ICMPCTL_RETURNDATABYTES:
    903 		arg = icmpreturndatabytes;
    904 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    905 		if (error)
    906 			break;
    907 		if ((arg >= 8) || (arg <= 512))
    908 			icmpreturndatabytes = arg;
    909 		else
    910 			error = EINVAL;
    911 		break;
    912 	case ICMPCTL_ERRPPSLIMIT:
    913 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    914 		break;
    915 	case ICMPCTL_REDIRACCEPT:
    916 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    917 				   &icmp_rediraccept);
    918 		break;
    919 	case ICMPCTL_REDIRTIMEOUT:
    920 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    921 				   &icmp_redirtimeout);
    922 		if (icmp_redirect_timeout_q != NULL) {
    923 			if (icmp_redirtimeout == 0) {
    924 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
    925 						       TRUE);
    926 				icmp_redirect_timeout_q = NULL;
    927 			} else {
    928 				rt_timer_queue_change(icmp_redirect_timeout_q,
    929 						      icmp_redirtimeout);
    930 			}
    931 		} else if (icmp_redirtimeout > 0) {
    932 			icmp_redirect_timeout_q =
    933 				rt_timer_queue_create(icmp_redirtimeout);
    934 		}
    935 		return (error);
    936 
    937 		break;
    938 	default:
    939 		error = ENOPROTOOPT;
    940 		break;
    941 	}
    942 	return error;
    943 }
    944 
    945 /* Table of common MTUs: */
    946 
    947 static const u_int mtu_table[] = {
    948 	65535, 65280, 32000, 17914, 9180, 8166,
    949 	4352, 2002, 1492, 1006, 508, 296, 68, 0
    950 };
    951 
    952 void
    953 icmp_mtudisc(icp, faddr)
    954 	struct icmp *icp;
    955 	struct in_addr faddr;
    956 {
    957 	struct icmp_mtudisc_callback *mc;
    958 	struct sockaddr *dst = sintosa(&icmpsrc);
    959 	struct rtentry *rt;
    960 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    961 	int    error;
    962 
    963 	rt = rtalloc1(dst, 1);
    964 	if (rt == 0)
    965 		return;
    966 
    967 	/* If we didn't get a host route, allocate one */
    968 
    969 	if ((rt->rt_flags & RTF_HOST) == 0) {
    970 		struct rtentry *nrt;
    971 
    972 		error = rtrequest((int) RTM_ADD, dst,
    973 		    (struct sockaddr *) rt->rt_gateway,
    974 		    (struct sockaddr *) 0,
    975 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    976 		if (error) {
    977 			rtfree(rt);
    978 			return;
    979 		}
    980 		nrt->rt_rmx = rt->rt_rmx;
    981 		rtfree(rt);
    982 		rt = nrt;
    983 	}
    984 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    985 	if (error) {
    986 		rtfree(rt);
    987 		return;
    988 	}
    989 
    990 	if (mtu == 0) {
    991 		int i = 0;
    992 
    993 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    994 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    995 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    996 			mtu -= (icp->icmp_ip.ip_hl << 2);
    997 
    998 		/* If we still can't guess a value, try the route */
    999 
   1000 		if (mtu == 0) {
   1001 			mtu = rt->rt_rmx.rmx_mtu;
   1002 
   1003 			/* If no route mtu, default to the interface mtu */
   1004 
   1005 			if (mtu == 0)
   1006 				mtu = rt->rt_ifp->if_mtu;
   1007 		}
   1008 
   1009 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
   1010 			if (mtu > mtu_table[i]) {
   1011 				mtu = mtu_table[i];
   1012 				break;
   1013 			}
   1014 	}
   1015 
   1016 	/*
   1017 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
   1018 	 *	  to turn off PMTU for a route, and the kernel can
   1019 	 *	  set it to indicate a serious problem with PMTU
   1020 	 *	  on a route.  We should be using a separate flag
   1021 	 *	  for the kernel to indicate this.
   1022 	 */
   1023 
   1024 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1025 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
   1026 			rt->rt_rmx.rmx_locks |= RTV_MTU;
   1027 		else if (rt->rt_rmx.rmx_mtu > mtu ||
   1028 			 rt->rt_rmx.rmx_mtu == 0) {
   1029 			icmpstat.icps_pmtuchg++;
   1030 			rt->rt_rmx.rmx_mtu = mtu;
   1031 		}
   1032 	}
   1033 
   1034 	if (rt)
   1035 		rtfree(rt);
   1036 
   1037 	/*
   1038 	 * Notify protocols that the MTU for this destination
   1039 	 * has changed.
   1040 	 */
   1041 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
   1042 	     mc = LIST_NEXT(mc, mc_list))
   1043 		(*mc->mc_func)(faddr);
   1044 }
   1045 
   1046 /*
   1047  * Return the next larger or smaller MTU plateau (table from RFC 1191)
   1048  * given current value MTU.  If DIR is less than zero, a larger plateau
   1049  * is returned; otherwise, a smaller value is returned.
   1050  */
   1051 int
   1052 ip_next_mtu(mtu, dir)	/* XXX */
   1053 	int mtu;
   1054 	int dir;
   1055 {
   1056 	int i;
   1057 
   1058 	for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
   1059 		if (mtu >= mtu_table[i])
   1060 			break;
   1061 	}
   1062 
   1063 	if (dir < 0) {
   1064 		if (i == 0) {
   1065 			return 0;
   1066 		} else {
   1067 			return mtu_table[i - 1];
   1068 		}
   1069 	} else {
   1070 		if (mtu_table[i] == 0) {
   1071 			return 0;
   1072 		} else if (mtu > mtu_table[i]) {
   1073 			return mtu_table[i];
   1074 		} else {
   1075 			return mtu_table[i + 1];
   1076 		}
   1077 	}
   1078 }
   1079 
   1080 static void
   1081 icmp_mtudisc_timeout(rt, r)
   1082 	struct rtentry *rt;
   1083 	struct rttimer *r;
   1084 {
   1085 	if (rt == NULL)
   1086 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1087 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1088 	    (RTF_DYNAMIC | RTF_HOST)) {
   1089 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1090 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1091 	} else {
   1092 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1093 			rt->rt_rmx.rmx_mtu = 0;
   1094 		}
   1095 	}
   1096 }
   1097 
   1098 static void
   1099 icmp_redirect_timeout(rt, r)
   1100 	struct rtentry *rt;
   1101 	struct rttimer *r;
   1102 {
   1103 	if (rt == NULL)
   1104 		panic("icmp_redirect_timeout:  bad route to timeout");
   1105 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1106 	    (RTF_DYNAMIC | RTF_HOST)) {
   1107 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1108 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1109 	}
   1110 }
   1111 
   1112 /*
   1113  * Perform rate limit check.
   1114  * Returns 0 if it is okay to send the icmp packet.
   1115  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1116  * limitation.
   1117  *
   1118  * XXX per-destination/type check necessary?
   1119  */
   1120 static int
   1121 icmp_ratelimit(dst, type, code)
   1122 	const struct in_addr *dst;
   1123 	const int type;			/* not used at this moment */
   1124 	const int code;			/* not used at this moment */
   1125 {
   1126 
   1127 	/* PPS limit */
   1128 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1129 	    icmperrppslim)) {
   1130 		/* The packet is subject to rate limit */
   1131 		return 1;
   1132 	}
   1133 
   1134 	/*okay to send*/
   1135 	return 0;
   1136 }
   1137