Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.65
      1 /*	$NetBSD: ip_icmp.c,v 1.65 2001/11/04 20:55:27 matt Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * This code is derived from software contributed to The NetBSD Foundation
     41  * by Jason R. Thorpe of Zembu Labs, Inc.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the NetBSD
     54  *	Foundation, Inc. and its contributors.
     55  * 4. Neither the name of The NetBSD Foundation nor the names of its
     56  *    contributors may be used to endorse or promote products derived
     57  *    from this software without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     69  * POSSIBILITY OF SUCH DAMAGE.
     70  */
     71 
     72 /*
     73  * Copyright (c) 1982, 1986, 1988, 1993
     74  *	The Regents of the University of California.  All rights reserved.
     75  *
     76  * Redistribution and use in source and binary forms, with or without
     77  * modification, are permitted provided that the following conditions
     78  * are met:
     79  * 1. Redistributions of source code must retain the above copyright
     80  *    notice, this list of conditions and the following disclaimer.
     81  * 2. Redistributions in binary form must reproduce the above copyright
     82  *    notice, this list of conditions and the following disclaimer in the
     83  *    documentation and/or other materials provided with the distribution.
     84  * 3. All advertising materials mentioning features or use of this software
     85  *    must display the following acknowledgement:
     86  *	This product includes software developed by the University of
     87  *	California, Berkeley and its contributors.
     88  * 4. Neither the name of the University nor the names of its contributors
     89  *    may be used to endorse or promote products derived from this software
     90  *    without specific prior written permission.
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    102  * SUCH DAMAGE.
    103  *
    104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    105  */
    106 
    107 #include "opt_ipsec.h"
    108 
    109 #include <sys/param.h>
    110 #include <sys/systm.h>
    111 #include <sys/malloc.h>
    112 #include <sys/mbuf.h>
    113 #include <sys/protosw.h>
    114 #include <sys/socket.h>
    115 #include <sys/time.h>
    116 #include <sys/kernel.h>
    117 #include <sys/syslog.h>
    118 #include <sys/sysctl.h>
    119 
    120 #include <net/if.h>
    121 #include <net/route.h>
    122 
    123 #include <netinet/in.h>
    124 #include <netinet/in_systm.h>
    125 #include <netinet/in_var.h>
    126 #include <netinet/ip.h>
    127 #include <netinet/ip_icmp.h>
    128 #include <netinet/ip_var.h>
    129 #include <netinet/in_pcb.h>
    130 #include <netinet/icmp_var.h>
    131 
    132 #ifdef IPSEC
    133 #include <netinet6/ipsec.h>
    134 #include <netkey/key.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 int	icmpreturndatabytes = 8;
    150 
    151 /*
    152  * List of callbacks to notify when Path MTU changes are made.
    153  */
    154 struct icmp_mtudisc_callback {
    155 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
    156 	void (*mc_func) __P((struct in_addr));
    157 };
    158 
    159 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
    160     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
    161 
    162 #if 0
    163 static int	ip_next_mtu __P((int, int));
    164 #else
    165 /*static*/ int	ip_next_mtu __P((int, int));
    166 #endif
    167 
    168 extern int icmperrppslim;
    169 static int icmperrpps_count = 0;
    170 static struct timeval icmperrppslim_last;
    171 static int icmp_rediraccept = 1;
    172 static int icmp_redirtimeout = 0;
    173 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
    174 
    175 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    176 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
    177 
    178 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    179 
    180 
    181 void
    182 icmp_init()
    183 {
    184 	/*
    185 	 * This is only useful if the user initializes redirtimeout to
    186 	 * something other than zero.
    187 	 */
    188 	if (icmp_redirtimeout != 0) {
    189 		icmp_redirect_timeout_q =
    190 			rt_timer_queue_create(icmp_redirtimeout);
    191 	}
    192 }
    193 
    194 /*
    195  * Register a Path MTU Discovery callback.
    196  */
    197 void
    198 icmp_mtudisc_callback_register(func)
    199 	void (*func) __P((struct in_addr));
    200 {
    201 	struct icmp_mtudisc_callback *mc;
    202 
    203 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    204 	     mc = LIST_NEXT(mc, mc_list)) {
    205 		if (mc->mc_func == func)
    206 			return;
    207 	}
    208 
    209 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
    210 	if (mc == NULL)
    211 		panic("icmp_mtudisc_callback_register");
    212 
    213 	mc->mc_func = func;
    214 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
    215 }
    216 
    217 /*
    218  * Generate an error packet of type error
    219  * in response to bad packet ip.
    220  */
    221 void
    222 icmp_error(n, type, code, dest, destifp)
    223 	struct mbuf *n;
    224 	int type, code;
    225 	n_long dest;
    226 	struct ifnet *destifp;
    227 {
    228 	struct ip *oip = mtod(n, struct ip *), *nip;
    229 	unsigned oiplen = oip->ip_hl << 2;
    230 	struct icmp *icp;
    231 	struct mbuf *m;
    232 	unsigned icmplen, mblen;
    233 
    234 #ifdef ICMPPRINTFS
    235 	if (icmpprintfs)
    236 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    237 #endif
    238 	if (type != ICMP_REDIRECT)
    239 		icmpstat.icps_error++;
    240 	/*
    241 	 * Don't send error if the original packet was encrypted.
    242 	 * Don't send error if not the first fragment of message.
    243 	 * Don't error if the old packet protocol was ICMP
    244 	 * error message, only known informational types.
    245 	 */
    246 	if (n->m_flags & M_DECRYPTED)
    247 		goto freeit;
    248 	if (oip->ip_off &~ (IP_MF|IP_DF))
    249 		goto freeit;
    250 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    251 	  n->m_len >= oiplen + ICMP_MINLEN &&
    252 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    253 		icmpstat.icps_oldicmp++;
    254 		goto freeit;
    255 	}
    256 	/* Don't send error in response to a multicast or broadcast packet */
    257 	if (n->m_flags & (M_BCAST|M_MCAST))
    258 		goto freeit;
    259 
    260 	/*
    261 	 * First, do a rate limitation check.
    262 	 */
    263 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    264 		/* XXX stat */
    265 		goto freeit;
    266 	}
    267 
    268 	/*
    269 	 * Now, formulate icmp message
    270 	 */
    271 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    272 	/*
    273 	 * Defend against mbuf chains shorter than oip->ip_len:
    274 	 */
    275 	mblen = 0;
    276 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    277 		mblen += m->m_len;
    278 	icmplen = min(mblen, icmplen);
    279 
    280 	/*
    281 	 * As we are not required to return everything we have,
    282 	 * we return whatever we can return at ease.
    283 	 *
    284 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    285 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    286 	 * icmp_sysctl will keep things below that limit.
    287 	 */
    288 
    289 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    290 
    291 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    292 		icmplen = MCLBYTES - ICMP_MINLEN;
    293 
    294 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    295 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    296 		MCLGET(m, M_DONTWAIT);
    297 		if ((m->m_flags & M_EXT) == 0) {
    298 			m_freem(m);
    299 			m = NULL;
    300 		}
    301 	}
    302 	if (m == NULL)
    303 		goto freeit;
    304 	m->m_len = icmplen + ICMP_MINLEN;
    305 	if ((m->m_flags & M_EXT) == 0)
    306 		MH_ALIGN(m, m->m_len);
    307 	icp = mtod(m, struct icmp *);
    308 	if ((u_int)type > ICMP_MAXTYPE)
    309 		panic("icmp_error");
    310 	icmpstat.icps_outhist[type]++;
    311 	icp->icmp_type = type;
    312 	if (type == ICMP_REDIRECT)
    313 		icp->icmp_gwaddr.s_addr = dest;
    314 	else {
    315 		icp->icmp_void = 0;
    316 		/*
    317 		 * The following assignments assume an overlay with the
    318 		 * zeroed icmp_void field.
    319 		 */
    320 		if (type == ICMP_PARAMPROB) {
    321 			icp->icmp_pptr = code;
    322 			code = 0;
    323 		} else if (type == ICMP_UNREACH &&
    324 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    325 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    326 	}
    327 
    328 	HTONS(oip->ip_off);
    329 	HTONS(oip->ip_len);
    330 	icp->icmp_code = code;
    331 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    332 	nip = &icp->icmp_ip;
    333 
    334 	/*
    335 	 * Now, copy old ip header (without options)
    336 	 * in front of icmp message.
    337 	 */
    338 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    339 		panic("icmp len");
    340 	m->m_data -= sizeof(struct ip);
    341 	m->m_len += sizeof(struct ip);
    342 	m->m_pkthdr.len = m->m_len;
    343 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    344 	nip = mtod(m, struct ip *);
    345 	/* ip_v set in ip_output */
    346 	nip->ip_hl = sizeof(struct ip) >> 2;
    347 	nip->ip_tos = 0;
    348 	nip->ip_len = m->m_len;
    349 	/* ip_id set in ip_output */
    350 	nip->ip_off = 0;
    351 	/* ip_ttl set in icmp_reflect */
    352 	nip->ip_p = IPPROTO_ICMP;
    353 	nip->ip_src = oip->ip_src;
    354 	nip->ip_dst = oip->ip_dst;
    355 	icmp_reflect(m);
    356 
    357 freeit:
    358 	m_freem(n);
    359 }
    360 
    361 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    362 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    363 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    364 struct sockaddr_in icmpmask = { 8, 0 };
    365 
    366 /*
    367  * Process a received ICMP message.
    368  */
    369 void
    370 #if __STDC__
    371 icmp_input(struct mbuf *m, ...)
    372 #else
    373 icmp_input(m, va_alist)
    374 	struct mbuf *m;
    375 	va_dcl
    376 #endif
    377 {
    378 	int proto;
    379 	struct icmp *icp;
    380 	struct ip *ip = mtod(m, struct ip *);
    381 	int icmplen;
    382 	int i;
    383 	struct in_ifaddr *ia;
    384 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    385 	int code;
    386 	int hlen;
    387 	va_list ap;
    388 	struct rtentry *rt;
    389 
    390 	va_start(ap, m);
    391 	hlen = va_arg(ap, int);
    392 	proto = va_arg(ap, int);
    393 	va_end(ap);
    394 
    395 	/*
    396 	 * Locate icmp structure in mbuf, and check
    397 	 * that not corrupted and of at least minimum length.
    398 	 */
    399 	icmplen = ip->ip_len - hlen;
    400 #ifdef ICMPPRINTFS
    401 	if (icmpprintfs)
    402 		printf("icmp_input from %x to %x, len %d\n",
    403 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    404 		    icmplen);
    405 #endif
    406 	if (icmplen < ICMP_MINLEN) {
    407 		icmpstat.icps_tooshort++;
    408 		goto freeit;
    409 	}
    410 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    411 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    412 		icmpstat.icps_tooshort++;
    413 		return;
    414 	}
    415 	ip = mtod(m, struct ip *);
    416 	m->m_len -= hlen;
    417 	m->m_data += hlen;
    418 	icp = mtod(m, struct icmp *);
    419 	if (in_cksum(m, icmplen)) {
    420 		icmpstat.icps_checksum++;
    421 		goto freeit;
    422 	}
    423 	m->m_len += hlen;
    424 	m->m_data -= hlen;
    425 
    426 #ifdef ICMPPRINTFS
    427 	/*
    428 	 * Message type specific processing.
    429 	 */
    430 	if (icmpprintfs)
    431 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    432 		    icp->icmp_code);
    433 #endif
    434 	if (icp->icmp_type > ICMP_MAXTYPE)
    435 		goto raw;
    436 	icmpstat.icps_inhist[icp->icmp_type]++;
    437 	code = icp->icmp_code;
    438 	switch (icp->icmp_type) {
    439 
    440 	case ICMP_UNREACH:
    441 		switch (code) {
    442 			case ICMP_UNREACH_NET:
    443 			case ICMP_UNREACH_HOST:
    444 			case ICMP_UNREACH_PROTOCOL:
    445 			case ICMP_UNREACH_PORT:
    446 			case ICMP_UNREACH_SRCFAIL:
    447 				code += PRC_UNREACH_NET;
    448 				break;
    449 
    450 			case ICMP_UNREACH_NEEDFRAG:
    451 				code = PRC_MSGSIZE;
    452 				break;
    453 
    454 			case ICMP_UNREACH_NET_UNKNOWN:
    455 			case ICMP_UNREACH_NET_PROHIB:
    456 			case ICMP_UNREACH_TOSNET:
    457 				code = PRC_UNREACH_NET;
    458 				break;
    459 
    460 			case ICMP_UNREACH_HOST_UNKNOWN:
    461 			case ICMP_UNREACH_ISOLATED:
    462 			case ICMP_UNREACH_HOST_PROHIB:
    463 			case ICMP_UNREACH_TOSHOST:
    464 				code = PRC_UNREACH_HOST;
    465 				break;
    466 
    467 			default:
    468 				goto badcode;
    469 		}
    470 		goto deliver;
    471 
    472 	case ICMP_TIMXCEED:
    473 		if (code > 1)
    474 			goto badcode;
    475 		code += PRC_TIMXCEED_INTRANS;
    476 		goto deliver;
    477 
    478 	case ICMP_PARAMPROB:
    479 		if (code > 1)
    480 			goto badcode;
    481 		code = PRC_PARAMPROB;
    482 		goto deliver;
    483 
    484 	case ICMP_SOURCEQUENCH:
    485 		if (code)
    486 			goto badcode;
    487 		code = PRC_QUENCH;
    488 		goto deliver;
    489 
    490 	deliver:
    491 		/*
    492 		 * Problem with datagram; advise higher level routines.
    493 		 */
    494 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    495 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    496 			icmpstat.icps_badlen++;
    497 			goto freeit;
    498 		}
    499 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    500 			goto badcode;
    501 		NTOHS(icp->icmp_ip.ip_len);
    502 #ifdef ICMPPRINTFS
    503 		if (icmpprintfs)
    504 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    505 #endif
    506 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    507 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    508 		if (ctlfunc)
    509 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
    510 			    &icp->icmp_ip);
    511 		break;
    512 
    513 	badcode:
    514 		icmpstat.icps_badcode++;
    515 		break;
    516 
    517 	case ICMP_ECHO:
    518 		icp->icmp_type = ICMP_ECHOREPLY;
    519 		goto reflect;
    520 
    521 	case ICMP_TSTAMP:
    522 		if (icmplen < ICMP_TSLEN) {
    523 			icmpstat.icps_badlen++;
    524 			break;
    525 		}
    526 		icp->icmp_type = ICMP_TSTAMPREPLY;
    527 		icp->icmp_rtime = iptime();
    528 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    529 		goto reflect;
    530 
    531 	case ICMP_MASKREQ:
    532 		if (icmpmaskrepl == 0)
    533 			break;
    534 		/*
    535 		 * We are not able to respond with all ones broadcast
    536 		 * unless we receive it over a point-to-point interface.
    537 		 */
    538 		if (icmplen < ICMP_MASKLEN) {
    539 			icmpstat.icps_badlen++;
    540 			break;
    541 		}
    542 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    543 		    in_nullhost(ip->ip_dst))
    544 			icmpdst.sin_addr = ip->ip_src;
    545 		else
    546 			icmpdst.sin_addr = ip->ip_dst;
    547 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    548 		    m->m_pkthdr.rcvif));
    549 		if (ia == 0)
    550 			break;
    551 		icp->icmp_type = ICMP_MASKREPLY;
    552 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    553 		if (in_nullhost(ip->ip_src)) {
    554 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    555 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    556 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    557 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    558 		}
    559 reflect:
    560 		icmpstat.icps_reflect++;
    561 		icmpstat.icps_outhist[icp->icmp_type]++;
    562 		icmp_reflect(m);
    563 		return;
    564 
    565 	case ICMP_REDIRECT:
    566 		if (code > 3)
    567 			goto badcode;
    568 		if (icmp_rediraccept == 0)
    569 			goto freeit;
    570 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    571 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    572 			icmpstat.icps_badlen++;
    573 			break;
    574 		}
    575 		/*
    576 		 * Short circuit routing redirects to force
    577 		 * immediate change in the kernel's routing
    578 		 * tables.  The message is also handed to anyone
    579 		 * listening on a raw socket (e.g. the routing
    580 		 * daemon for use in updating its tables).
    581 		 */
    582 		icmpgw.sin_addr = ip->ip_src;
    583 		icmpdst.sin_addr = icp->icmp_gwaddr;
    584 #ifdef	ICMPPRINTFS
    585 		if (icmpprintfs)
    586 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    587 			    icp->icmp_gwaddr);
    588 #endif
    589 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    590 		rt = NULL;
    591 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    592 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    593 		    sintosa(&icmpgw), (struct rtentry **)&rt);
    594 		if (rt != NULL && icmp_redirtimeout != 0) {
    595 			i = rt_timer_add(rt, icmp_redirect_timeout,
    596 					 icmp_redirect_timeout_q);
    597 			if (i)
    598 				log(LOG_ERR, "ICMP:  redirect failed to "
    599 				    "register timeout for route to %x, "
    600 				    "code %d\n",
    601 				    icp->icmp_ip.ip_dst.s_addr, i);
    602 		}
    603 		if (rt != NULL)
    604 			rtfree(rt);
    605 
    606 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    607 #ifdef IPSEC
    608 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    609 #endif
    610 		break;
    611 
    612 	/*
    613 	 * No kernel processing for the following;
    614 	 * just fall through to send to raw listener.
    615 	 */
    616 	case ICMP_ECHOREPLY:
    617 	case ICMP_ROUTERADVERT:
    618 	case ICMP_ROUTERSOLICIT:
    619 	case ICMP_TSTAMPREPLY:
    620 	case ICMP_IREQREPLY:
    621 	case ICMP_MASKREPLY:
    622 	default:
    623 		break;
    624 	}
    625 
    626 raw:
    627 	rip_input(m, hlen, proto);
    628 	return;
    629 
    630 freeit:
    631 	m_freem(m);
    632 	return;
    633 }
    634 
    635 /*
    636  * Reflect the ip packet back to the source
    637  */
    638 void
    639 icmp_reflect(m)
    640 	struct mbuf *m;
    641 {
    642 	struct ip *ip = mtod(m, struct ip *);
    643 	struct in_ifaddr *ia;
    644 	struct ifaddr *ifa;
    645 	struct sockaddr_in *sin = 0;
    646 	struct in_addr t;
    647 	struct mbuf *opts = 0;
    648 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    649 
    650 	if (!in_canforward(ip->ip_src) &&
    651 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    652 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    653 		m_freem(m);	/* Bad return address */
    654 		goto done;	/* ip_output() will check for broadcast */
    655 	}
    656 	t = ip->ip_dst;
    657 	ip->ip_dst = ip->ip_src;
    658 	/*
    659 	 * If the incoming packet was addressed directly to us, use
    660 	 * dst as the src for the reply.  Otherwise (broadcast or
    661 	 * anonymous), use an address which corresponds to the
    662 	 * incoming interface, with a preference for the address which
    663 	 * corresponds to the route to the destination of the ICMP.
    664 	 */
    665 
    666 	/* Look for packet addressed to us */
    667 	INADDR_TO_IA(t, ia);
    668 
    669 	/* look for packet sent to broadcast address */
    670 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    671 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
    672 			if (ifa->ifa_addr->sa_family != AF_INET)
    673 				continue;
    674 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    675 				ia = ifatoia(ifa);
    676 				break;
    677 			}
    678 		}
    679 	}
    680 
    681 	if (ia)
    682 		sin = &ia->ia_addr;
    683 
    684 	icmpdst.sin_addr = t;
    685 
    686 	/* if the packet is addressed somewhere else, compute the
    687 	   source address for packets routed back to the source, and
    688 	   use that, if it's an address on the interface which
    689 	   received the packet */
    690 	if (sin == (struct sockaddr_in *)0) {
    691 		struct sockaddr_in sin_dst;
    692 		struct route icmproute;
    693 		int errornum;
    694 
    695 		sin_dst.sin_family = AF_INET;
    696 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    697 		sin_dst.sin_addr = ip->ip_dst;
    698 		bzero(&icmproute, sizeof(icmproute));
    699 		errornum = 0;
    700 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    701 		/* errornum is never used */
    702 		if (icmproute.ro_rt)
    703 			RTFREE(icmproute.ro_rt);
    704 		/* check to make sure sin is a source address on rcvif */
    705 		if (sin) {
    706 			t = sin->sin_addr;
    707 			sin = (struct sockaddr_in *)0;
    708 			INADDR_TO_IA(t, ia);
    709 			while (ia) {
    710 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    711 					sin = &ia->ia_addr;
    712 					break;
    713 				}
    714 				NEXT_IA_WITH_SAME_ADDR(ia);
    715 			}
    716 		}
    717 	}
    718 
    719 	/* if it was not addressed to us, but the route doesn't go out
    720 	   the source interface, pick an address on the source
    721 	   interface.  This can happen when routing is asymmetric, or
    722 	   when the incoming packet was encapsulated */
    723 	if (sin == (struct sockaddr_in *)0) {
    724 		TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
    725 			if (ifa->ifa_addr->sa_family != AF_INET)
    726 				continue;
    727 			sin = &(ifatoia(ifa)->ia_addr);
    728 			break;
    729 		}
    730 	}
    731 
    732 	/*
    733 	 * The following happens if the packet was not addressed to us,
    734 	 * and was received on an interface with no IP address:
    735 	 * We find the first AF_INET address on the first non-loopback
    736 	 * interface.
    737 	 */
    738 	if (sin == (struct sockaddr_in *)0)
    739 		TAILQ_FOREACH(ia, &in_ifaddr, ia_list) {
    740 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    741 				continue;
    742 			sin = &ia->ia_addr;
    743 			break;
    744 		}
    745 
    746 	/*
    747 	 * If we still didn't find an address, punt.  We could have an
    748 	 * interface up (and receiving packets) with no address.
    749 	 */
    750 	if (sin == (struct sockaddr_in *)0) {
    751 		m_freem(m);
    752 		goto done;
    753 	}
    754 
    755 	ip->ip_src = sin->sin_addr;
    756 	ip->ip_ttl = MAXTTL;
    757 
    758 	if (optlen > 0) {
    759 		u_char *cp;
    760 		int opt, cnt;
    761 		u_int len;
    762 
    763 		/*
    764 		 * Retrieve any source routing from the incoming packet;
    765 		 * add on any record-route or timestamp options.
    766 		 */
    767 		cp = (u_char *) (ip + 1);
    768 		if ((opts = ip_srcroute()) == 0 &&
    769 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    770 			opts->m_len = sizeof(struct in_addr);
    771 			*mtod(opts, struct in_addr *) = zeroin_addr;
    772 		}
    773 		if (opts) {
    774 #ifdef ICMPPRINTFS
    775 		    if (icmpprintfs)
    776 			    printf("icmp_reflect optlen %d rt %d => ",
    777 				optlen, opts->m_len);
    778 #endif
    779 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    780 			    opt = cp[IPOPT_OPTVAL];
    781 			    if (opt == IPOPT_EOL)
    782 				    break;
    783 			    if (opt == IPOPT_NOP)
    784 				    len = 1;
    785 			    else {
    786 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    787 					    break;
    788 				    len = cp[IPOPT_OLEN];
    789 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    790 				        len > cnt)
    791 					    break;
    792 			    }
    793 			    /*
    794 			     * Should check for overflow, but it "can't happen"
    795 			     */
    796 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    797 				opt == IPOPT_SECURITY) {
    798 				    bcopy((caddr_t)cp,
    799 					mtod(opts, caddr_t) + opts->m_len, len);
    800 				    opts->m_len += len;
    801 			    }
    802 		    }
    803 		    /* Terminate & pad, if necessary */
    804 		    if ((cnt = opts->m_len % 4) != 0) {
    805 			    for (; cnt < 4; cnt++) {
    806 				    *(mtod(opts, caddr_t) + opts->m_len) =
    807 					IPOPT_EOL;
    808 				    opts->m_len++;
    809 			    }
    810 		    }
    811 #ifdef ICMPPRINTFS
    812 		    if (icmpprintfs)
    813 			    printf("%d\n", opts->m_len);
    814 #endif
    815 		}
    816 		/*
    817 		 * Now strip out original options by copying rest of first
    818 		 * mbuf's data back, and adjust the IP length.
    819 		 */
    820 		ip->ip_len -= optlen;
    821 		ip->ip_hl = sizeof(struct ip) >> 2;
    822 		m->m_len -= optlen;
    823 		if (m->m_flags & M_PKTHDR)
    824 			m->m_pkthdr.len -= optlen;
    825 		optlen += sizeof(struct ip);
    826 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    827 			 (unsigned)(m->m_len - sizeof(struct ip)));
    828 	}
    829 	m->m_flags &= ~(M_BCAST|M_MCAST);
    830 	icmp_send(m, opts);
    831 done:
    832 	if (opts)
    833 		(void)m_free(opts);
    834 }
    835 
    836 /*
    837  * Send an icmp packet back to the ip level,
    838  * after supplying a checksum.
    839  */
    840 void
    841 icmp_send(m, opts)
    842 	struct mbuf *m;
    843 	struct mbuf *opts;
    844 {
    845 	struct ip *ip = mtod(m, struct ip *);
    846 	int hlen;
    847 	struct icmp *icp;
    848 
    849 	hlen = ip->ip_hl << 2;
    850 	m->m_data += hlen;
    851 	m->m_len -= hlen;
    852 	icp = mtod(m, struct icmp *);
    853 	icp->icmp_cksum = 0;
    854 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    855 	m->m_data -= hlen;
    856 	m->m_len += hlen;
    857 #ifdef ICMPPRINTFS
    858 	if (icmpprintfs)
    859 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    860 #endif
    861 #ifdef IPSEC
    862 	/* Don't lookup socket */
    863 	(void)ipsec_setsocket(m, NULL);
    864 #endif
    865 	(void) ip_output(m, opts, NULL, 0, NULL);
    866 }
    867 
    868 n_time
    869 iptime()
    870 {
    871 	struct timeval atv;
    872 	u_long t;
    873 
    874 	microtime(&atv);
    875 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    876 	return (htonl(t));
    877 }
    878 
    879 int
    880 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    881 	int *name;
    882 	u_int namelen;
    883 	void *oldp;
    884 	size_t *oldlenp;
    885 	void *newp;
    886 	size_t newlen;
    887 {
    888 	int arg, error;
    889 
    890 	/* All sysctl names at this level are terminal. */
    891 	if (namelen != 1)
    892 		return (ENOTDIR);
    893 
    894 	switch (name[0])
    895 	{
    896 	case ICMPCTL_MASKREPL:
    897 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    898 		break;
    899 	case ICMPCTL_RETURNDATABYTES:
    900 		arg = icmpreturndatabytes;
    901 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    902 		if (error)
    903 			break;
    904 		if ((arg >= 8) || (arg <= 512))
    905 			icmpreturndatabytes = arg;
    906 		else
    907 			error = EINVAL;
    908 		break;
    909 	case ICMPCTL_ERRPPSLIMIT:
    910 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    911 		break;
    912 	case ICMPCTL_REDIRACCEPT:
    913 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    914 				   &icmp_rediraccept);
    915 		break;
    916 	case ICMPCTL_REDIRTIMEOUT:
    917 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    918 				   &icmp_redirtimeout);
    919 		if (icmp_redirect_timeout_q != NULL) {
    920 			if (icmp_redirtimeout == 0) {
    921 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
    922 						       TRUE);
    923 				icmp_redirect_timeout_q = NULL;
    924 			} else {
    925 				rt_timer_queue_change(icmp_redirect_timeout_q,
    926 						      icmp_redirtimeout);
    927 			}
    928 		} else if (icmp_redirtimeout > 0) {
    929 			icmp_redirect_timeout_q =
    930 				rt_timer_queue_create(icmp_redirtimeout);
    931 		}
    932 		return (error);
    933 
    934 		break;
    935 	default:
    936 		error = ENOPROTOOPT;
    937 		break;
    938 	}
    939 	return error;
    940 }
    941 
    942 /* Table of common MTUs: */
    943 
    944 static const u_int mtu_table[] = {
    945 	65535, 65280, 32000, 17914, 9180, 8166,
    946 	4352, 2002, 1492, 1006, 508, 296, 68, 0
    947 };
    948 
    949 void
    950 icmp_mtudisc(icp, faddr)
    951 	struct icmp *icp;
    952 	struct in_addr faddr;
    953 {
    954 	struct icmp_mtudisc_callback *mc;
    955 	struct sockaddr *dst = sintosa(&icmpsrc);
    956 	struct rtentry *rt;
    957 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    958 	int    error;
    959 
    960 	rt = rtalloc1(dst, 1);
    961 	if (rt == 0)
    962 		return;
    963 
    964 	/* If we didn't get a host route, allocate one */
    965 
    966 	if ((rt->rt_flags & RTF_HOST) == 0) {
    967 		struct rtentry *nrt;
    968 
    969 		error = rtrequest((int) RTM_ADD, dst,
    970 		    (struct sockaddr *) rt->rt_gateway,
    971 		    (struct sockaddr *) 0,
    972 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    973 		if (error) {
    974 			rtfree(rt);
    975 			return;
    976 		}
    977 		nrt->rt_rmx = rt->rt_rmx;
    978 		rtfree(rt);
    979 		rt = nrt;
    980 	}
    981 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    982 	if (error) {
    983 		rtfree(rt);
    984 		return;
    985 	}
    986 
    987 	if (mtu == 0) {
    988 		int i = 0;
    989 
    990 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    991 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    992 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    993 			mtu -= (icp->icmp_ip.ip_hl << 2);
    994 
    995 		/* If we still can't guess a value, try the route */
    996 
    997 		if (mtu == 0) {
    998 			mtu = rt->rt_rmx.rmx_mtu;
    999 
   1000 			/* If no route mtu, default to the interface mtu */
   1001 
   1002 			if (mtu == 0)
   1003 				mtu = rt->rt_ifp->if_mtu;
   1004 		}
   1005 
   1006 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
   1007 			if (mtu > mtu_table[i]) {
   1008 				mtu = mtu_table[i];
   1009 				break;
   1010 			}
   1011 	}
   1012 
   1013 	/*
   1014 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
   1015 	 *	  to turn off PMTU for a route, and the kernel can
   1016 	 *	  set it to indicate a serious problem with PMTU
   1017 	 *	  on a route.  We should be using a separate flag
   1018 	 *	  for the kernel to indicate this.
   1019 	 */
   1020 
   1021 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1022 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
   1023 			rt->rt_rmx.rmx_locks |= RTV_MTU;
   1024 		else if (rt->rt_rmx.rmx_mtu > mtu ||
   1025 			 rt->rt_rmx.rmx_mtu == 0) {
   1026 			icmpstat.icps_pmtuchg++;
   1027 			rt->rt_rmx.rmx_mtu = mtu;
   1028 		}
   1029 	}
   1030 
   1031 	if (rt)
   1032 		rtfree(rt);
   1033 
   1034 	/*
   1035 	 * Notify protocols that the MTU for this destination
   1036 	 * has changed.
   1037 	 */
   1038 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
   1039 	     mc = LIST_NEXT(mc, mc_list))
   1040 		(*mc->mc_func)(faddr);
   1041 }
   1042 
   1043 /*
   1044  * Return the next larger or smaller MTU plateau (table from RFC 1191)
   1045  * given current value MTU.  If DIR is less than zero, a larger plateau
   1046  * is returned; otherwise, a smaller value is returned.
   1047  */
   1048 int
   1049 ip_next_mtu(mtu, dir)	/* XXX */
   1050 	int mtu;
   1051 	int dir;
   1052 {
   1053 	int i;
   1054 
   1055 	for (i = 0; i < (sizeof mtu_table) / (sizeof mtu_table[0]); i++) {
   1056 		if (mtu >= mtu_table[i])
   1057 			break;
   1058 	}
   1059 
   1060 	if (dir < 0) {
   1061 		if (i == 0) {
   1062 			return 0;
   1063 		} else {
   1064 			return mtu_table[i - 1];
   1065 		}
   1066 	} else {
   1067 		if (mtu_table[i] == 0) {
   1068 			return 0;
   1069 		} else if (mtu > mtu_table[i]) {
   1070 			return mtu_table[i];
   1071 		} else {
   1072 			return mtu_table[i + 1];
   1073 		}
   1074 	}
   1075 }
   1076 
   1077 static void
   1078 icmp_mtudisc_timeout(rt, r)
   1079 	struct rtentry *rt;
   1080 	struct rttimer *r;
   1081 {
   1082 	if (rt == NULL)
   1083 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1084 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1085 	    (RTF_DYNAMIC | RTF_HOST)) {
   1086 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1087 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1088 	} else {
   1089 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1090 			rt->rt_rmx.rmx_mtu = 0;
   1091 		}
   1092 	}
   1093 }
   1094 
   1095 static void
   1096 icmp_redirect_timeout(rt, r)
   1097 	struct rtentry *rt;
   1098 	struct rttimer *r;
   1099 {
   1100 	if (rt == NULL)
   1101 		panic("icmp_redirect_timeout:  bad route to timeout");
   1102 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1103 	    (RTF_DYNAMIC | RTF_HOST)) {
   1104 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1105 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1106 	}
   1107 }
   1108 
   1109 /*
   1110  * Perform rate limit check.
   1111  * Returns 0 if it is okay to send the icmp packet.
   1112  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1113  * limitation.
   1114  *
   1115  * XXX per-destination/type check necessary?
   1116  */
   1117 static int
   1118 icmp_ratelimit(dst, type, code)
   1119 	const struct in_addr *dst;
   1120 	const int type;			/* not used at this moment */
   1121 	const int code;			/* not used at this moment */
   1122 {
   1123 
   1124 	/* PPS limit */
   1125 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1126 	    icmperrppslim)) {
   1127 		/* The packet is subject to rate limit */
   1128 		return 1;
   1129 	}
   1130 
   1131 	/*okay to send*/
   1132 	return 0;
   1133 }
   1134