Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.63
      1 /*	$NetBSD: ip_icmp.c,v 1.63 2001/10/30 06:41:10 kml Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * This code is derived from software contributed to The NetBSD Foundation
     41  * by Jason R. Thorpe of Zembu Labs, Inc.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the NetBSD
     54  *	Foundation, Inc. and its contributors.
     55  * 4. Neither the name of The NetBSD Foundation nor the names of its
     56  *    contributors may be used to endorse or promote products derived
     57  *    from this software without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     69  * POSSIBILITY OF SUCH DAMAGE.
     70  */
     71 
     72 /*
     73  * Copyright (c) 1982, 1986, 1988, 1993
     74  *	The Regents of the University of California.  All rights reserved.
     75  *
     76  * Redistribution and use in source and binary forms, with or without
     77  * modification, are permitted provided that the following conditions
     78  * are met:
     79  * 1. Redistributions of source code must retain the above copyright
     80  *    notice, this list of conditions and the following disclaimer.
     81  * 2. Redistributions in binary form must reproduce the above copyright
     82  *    notice, this list of conditions and the following disclaimer in the
     83  *    documentation and/or other materials provided with the distribution.
     84  * 3. All advertising materials mentioning features or use of this software
     85  *    must display the following acknowledgement:
     86  *	This product includes software developed by the University of
     87  *	California, Berkeley and its contributors.
     88  * 4. Neither the name of the University nor the names of its contributors
     89  *    may be used to endorse or promote products derived from this software
     90  *    without specific prior written permission.
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    102  * SUCH DAMAGE.
    103  *
    104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    105  */
    106 
    107 #include "opt_ipsec.h"
    108 
    109 #include <sys/param.h>
    110 #include <sys/systm.h>
    111 #include <sys/malloc.h>
    112 #include <sys/mbuf.h>
    113 #include <sys/protosw.h>
    114 #include <sys/socket.h>
    115 #include <sys/time.h>
    116 #include <sys/kernel.h>
    117 #include <sys/syslog.h>
    118 #include <sys/sysctl.h>
    119 
    120 #include <net/if.h>
    121 #include <net/route.h>
    122 
    123 #include <netinet/in.h>
    124 #include <netinet/in_systm.h>
    125 #include <netinet/in_var.h>
    126 #include <netinet/ip.h>
    127 #include <netinet/ip_icmp.h>
    128 #include <netinet/ip_var.h>
    129 #include <netinet/in_pcb.h>
    130 #include <netinet/icmp_var.h>
    131 
    132 #ifdef IPSEC
    133 #include <netinet6/ipsec.h>
    134 #include <netkey/key.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 int	icmpreturndatabytes = 8;
    150 
    151 /*
    152  * List of callbacks to notify when Path MTU changes are made.
    153  */
    154 struct icmp_mtudisc_callback {
    155 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
    156 	void (*mc_func) __P((struct in_addr));
    157 };
    158 
    159 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
    160     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
    161 
    162 #if 0
    163 static int	ip_next_mtu __P((int, int));
    164 #else
    165 /*static*/ int	ip_next_mtu __P((int, int));
    166 #endif
    167 
    168 extern int icmperrppslim;
    169 static int icmperrpps_count = 0;
    170 static struct timeval icmperrppslim_last;
    171 static int icmp_rediraccept = 1;
    172 static int icmp_redirtimeout = 0;
    173 static struct rttimer_queue *icmp_redirect_timeout_q = NULL;
    174 
    175 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    176 static void icmp_redirect_timeout __P((struct rtentry *, struct rttimer *));
    177 
    178 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    179 
    180 
    181 void
    182 icmp_init()
    183 {
    184 	/*
    185 	 * This is only useful if the user initializes redirtimeout to
    186 	 * something other than zero.
    187 	 */
    188 	if (icmp_redirtimeout != 0) {
    189 		icmp_redirect_timeout_q =
    190 			rt_timer_queue_create(icmp_redirtimeout);
    191 	}
    192 }
    193 
    194 /*
    195  * Register a Path MTU Discovery callback.
    196  */
    197 void
    198 icmp_mtudisc_callback_register(func)
    199 	void (*func) __P((struct in_addr));
    200 {
    201 	struct icmp_mtudisc_callback *mc;
    202 
    203 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    204 	     mc = LIST_NEXT(mc, mc_list)) {
    205 		if (mc->mc_func == func)
    206 			return;
    207 	}
    208 
    209 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
    210 	if (mc == NULL)
    211 		panic("icmp_mtudisc_callback_register");
    212 
    213 	mc->mc_func = func;
    214 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
    215 }
    216 
    217 /*
    218  * Generate an error packet of type error
    219  * in response to bad packet ip.
    220  */
    221 void
    222 icmp_error(n, type, code, dest, destifp)
    223 	struct mbuf *n;
    224 	int type, code;
    225 	n_long dest;
    226 	struct ifnet *destifp;
    227 {
    228 	struct ip *oip = mtod(n, struct ip *), *nip;
    229 	unsigned oiplen = oip->ip_hl << 2;
    230 	struct icmp *icp;
    231 	struct mbuf *m;
    232 	unsigned icmplen, mblen;
    233 
    234 #ifdef ICMPPRINTFS
    235 	if (icmpprintfs)
    236 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    237 #endif
    238 	if (type != ICMP_REDIRECT)
    239 		icmpstat.icps_error++;
    240 	/*
    241 	 * Don't send error if the original packet was encrypted.
    242 	 * Don't send error if not the first fragment of message.
    243 	 * Don't error if the old packet protocol was ICMP
    244 	 * error message, only known informational types.
    245 	 */
    246 	if (n->m_flags & M_DECRYPTED)
    247 		goto freeit;
    248 	if (oip->ip_off &~ (IP_MF|IP_DF))
    249 		goto freeit;
    250 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    251 	  n->m_len >= oiplen + ICMP_MINLEN &&
    252 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    253 		icmpstat.icps_oldicmp++;
    254 		goto freeit;
    255 	}
    256 	/* Don't send error in response to a multicast or broadcast packet */
    257 	if (n->m_flags & (M_BCAST|M_MCAST))
    258 		goto freeit;
    259 
    260 	/*
    261 	 * First, do a rate limitation check.
    262 	 */
    263 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    264 		/* XXX stat */
    265 		goto freeit;
    266 	}
    267 
    268 	/*
    269 	 * Now, formulate icmp message
    270 	 */
    271 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    272 	/*
    273 	 * Defend against mbuf chains shorter than oip->ip_len:
    274 	 */
    275 	mblen = 0;
    276 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    277 		mblen += m->m_len;
    278 	icmplen = min(mblen, icmplen);
    279 
    280 	/*
    281 	 * As we are not required to return everything we have,
    282 	 * we return whatever we can return at ease.
    283 	 *
    284 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    285 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    286 	 * icmp_sysctl will keep things below that limit.
    287 	 */
    288 
    289 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    290 
    291 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    292 		icmplen = MCLBYTES - ICMP_MINLEN;
    293 
    294 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    295 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    296 		MCLGET(m, M_DONTWAIT);
    297 		if ((m->m_flags & M_EXT) == 0) {
    298 			m_freem(m);
    299 			m = NULL;
    300 		}
    301 	}
    302 	if (m == NULL)
    303 		goto freeit;
    304 	m->m_len = icmplen + ICMP_MINLEN;
    305 	if ((m->m_flags & M_EXT) == 0)
    306 		MH_ALIGN(m, m->m_len);
    307 	icp = mtod(m, struct icmp *);
    308 	if ((u_int)type > ICMP_MAXTYPE)
    309 		panic("icmp_error");
    310 	icmpstat.icps_outhist[type]++;
    311 	icp->icmp_type = type;
    312 	if (type == ICMP_REDIRECT)
    313 		icp->icmp_gwaddr.s_addr = dest;
    314 	else {
    315 		icp->icmp_void = 0;
    316 		/*
    317 		 * The following assignments assume an overlay with the
    318 		 * zeroed icmp_void field.
    319 		 */
    320 		if (type == ICMP_PARAMPROB) {
    321 			icp->icmp_pptr = code;
    322 			code = 0;
    323 		} else if (type == ICMP_UNREACH &&
    324 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    325 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    326 	}
    327 
    328 	HTONS(oip->ip_off);
    329 	HTONS(oip->ip_len);
    330 	icp->icmp_code = code;
    331 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    332 	nip = &icp->icmp_ip;
    333 
    334 	/*
    335 	 * Now, copy old ip header (without options)
    336 	 * in front of icmp message.
    337 	 */
    338 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    339 		panic("icmp len");
    340 	m->m_data -= sizeof(struct ip);
    341 	m->m_len += sizeof(struct ip);
    342 	m->m_pkthdr.len = m->m_len;
    343 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    344 	nip = mtod(m, struct ip *);
    345 	/* ip_v set in ip_output */
    346 	nip->ip_hl = sizeof(struct ip) >> 2;
    347 	nip->ip_tos = 0;
    348 	nip->ip_len = m->m_len;
    349 	/* ip_id set in ip_output */
    350 	nip->ip_off = 0;
    351 	/* ip_ttl set in icmp_reflect */
    352 	nip->ip_p = IPPROTO_ICMP;
    353 	nip->ip_src = oip->ip_src;
    354 	nip->ip_dst = oip->ip_dst;
    355 	icmp_reflect(m);
    356 
    357 freeit:
    358 	m_freem(n);
    359 }
    360 
    361 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    362 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    363 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    364 struct sockaddr_in icmpmask = { 8, 0 };
    365 
    366 /*
    367  * Process a received ICMP message.
    368  */
    369 void
    370 #if __STDC__
    371 icmp_input(struct mbuf *m, ...)
    372 #else
    373 icmp_input(m, va_alist)
    374 	struct mbuf *m;
    375 	va_dcl
    376 #endif
    377 {
    378 	int proto;
    379 	struct icmp *icp;
    380 	struct ip *ip = mtod(m, struct ip *);
    381 	int icmplen;
    382 	int i;
    383 	struct in_ifaddr *ia;
    384 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    385 	int code;
    386 	int hlen;
    387 	va_list ap;
    388 	struct rtentry *rt;
    389 
    390 	va_start(ap, m);
    391 	hlen = va_arg(ap, int);
    392 	proto = va_arg(ap, int);
    393 	va_end(ap);
    394 
    395 	/*
    396 	 * Locate icmp structure in mbuf, and check
    397 	 * that not corrupted and of at least minimum length.
    398 	 */
    399 	icmplen = ip->ip_len - hlen;
    400 #ifdef ICMPPRINTFS
    401 	if (icmpprintfs)
    402 		printf("icmp_input from %x to %x, len %d\n",
    403 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    404 		    icmplen);
    405 #endif
    406 	if (icmplen < ICMP_MINLEN) {
    407 		icmpstat.icps_tooshort++;
    408 		goto freeit;
    409 	}
    410 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    411 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    412 		icmpstat.icps_tooshort++;
    413 		return;
    414 	}
    415 	ip = mtod(m, struct ip *);
    416 	m->m_len -= hlen;
    417 	m->m_data += hlen;
    418 	icp = mtod(m, struct icmp *);
    419 	if (in_cksum(m, icmplen)) {
    420 		icmpstat.icps_checksum++;
    421 		goto freeit;
    422 	}
    423 	m->m_len += hlen;
    424 	m->m_data -= hlen;
    425 
    426 #ifdef ICMPPRINTFS
    427 	/*
    428 	 * Message type specific processing.
    429 	 */
    430 	if (icmpprintfs)
    431 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    432 		    icp->icmp_code);
    433 #endif
    434 	if (icp->icmp_type > ICMP_MAXTYPE)
    435 		goto raw;
    436 	icmpstat.icps_inhist[icp->icmp_type]++;
    437 	code = icp->icmp_code;
    438 	switch (icp->icmp_type) {
    439 
    440 	case ICMP_UNREACH:
    441 		switch (code) {
    442 			case ICMP_UNREACH_NET:
    443 			case ICMP_UNREACH_HOST:
    444 			case ICMP_UNREACH_PROTOCOL:
    445 			case ICMP_UNREACH_PORT:
    446 			case ICMP_UNREACH_SRCFAIL:
    447 				code += PRC_UNREACH_NET;
    448 				break;
    449 
    450 			case ICMP_UNREACH_NEEDFRAG:
    451 				code = PRC_MSGSIZE;
    452 				break;
    453 
    454 			case ICMP_UNREACH_NET_UNKNOWN:
    455 			case ICMP_UNREACH_NET_PROHIB:
    456 			case ICMP_UNREACH_TOSNET:
    457 				code = PRC_UNREACH_NET;
    458 				break;
    459 
    460 			case ICMP_UNREACH_HOST_UNKNOWN:
    461 			case ICMP_UNREACH_ISOLATED:
    462 			case ICMP_UNREACH_HOST_PROHIB:
    463 			case ICMP_UNREACH_TOSHOST:
    464 				code = PRC_UNREACH_HOST;
    465 				break;
    466 
    467 			default:
    468 				goto badcode;
    469 		}
    470 		goto deliver;
    471 
    472 	case ICMP_TIMXCEED:
    473 		if (code > 1)
    474 			goto badcode;
    475 		code += PRC_TIMXCEED_INTRANS;
    476 		goto deliver;
    477 
    478 	case ICMP_PARAMPROB:
    479 		if (code > 1)
    480 			goto badcode;
    481 		code = PRC_PARAMPROB;
    482 		goto deliver;
    483 
    484 	case ICMP_SOURCEQUENCH:
    485 		if (code)
    486 			goto badcode;
    487 		code = PRC_QUENCH;
    488 		goto deliver;
    489 
    490 	deliver:
    491 		/*
    492 		 * Problem with datagram; advise higher level routines.
    493 		 */
    494 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    495 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    496 			icmpstat.icps_badlen++;
    497 			goto freeit;
    498 		}
    499 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    500 			goto badcode;
    501 		NTOHS(icp->icmp_ip.ip_len);
    502 #ifdef ICMPPRINTFS
    503 		if (icmpprintfs)
    504 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    505 #endif
    506 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    507 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    508 		if (ctlfunc)
    509 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
    510 			    &icp->icmp_ip);
    511 		break;
    512 
    513 	badcode:
    514 		icmpstat.icps_badcode++;
    515 		break;
    516 
    517 	case ICMP_ECHO:
    518 		icp->icmp_type = ICMP_ECHOREPLY;
    519 		goto reflect;
    520 
    521 	case ICMP_TSTAMP:
    522 		if (icmplen < ICMP_TSLEN) {
    523 			icmpstat.icps_badlen++;
    524 			break;
    525 		}
    526 		icp->icmp_type = ICMP_TSTAMPREPLY;
    527 		icp->icmp_rtime = iptime();
    528 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    529 		goto reflect;
    530 
    531 	case ICMP_MASKREQ:
    532 		if (icmpmaskrepl == 0)
    533 			break;
    534 		/*
    535 		 * We are not able to respond with all ones broadcast
    536 		 * unless we receive it over a point-to-point interface.
    537 		 */
    538 		if (icmplen < ICMP_MASKLEN) {
    539 			icmpstat.icps_badlen++;
    540 			break;
    541 		}
    542 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    543 		    in_nullhost(ip->ip_dst))
    544 			icmpdst.sin_addr = ip->ip_src;
    545 		else
    546 			icmpdst.sin_addr = ip->ip_dst;
    547 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    548 		    m->m_pkthdr.rcvif));
    549 		if (ia == 0)
    550 			break;
    551 		icp->icmp_type = ICMP_MASKREPLY;
    552 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    553 		if (in_nullhost(ip->ip_src)) {
    554 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    555 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    556 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    557 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    558 		}
    559 reflect:
    560 		icmpstat.icps_reflect++;
    561 		icmpstat.icps_outhist[icp->icmp_type]++;
    562 		icmp_reflect(m);
    563 		return;
    564 
    565 	case ICMP_REDIRECT:
    566 		if (code > 3)
    567 			goto badcode;
    568 		if (icmp_rediraccept == 0)
    569 			goto freeit;
    570 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    571 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    572 			icmpstat.icps_badlen++;
    573 			break;
    574 		}
    575 		/*
    576 		 * Short circuit routing redirects to force
    577 		 * immediate change in the kernel's routing
    578 		 * tables.  The message is also handed to anyone
    579 		 * listening on a raw socket (e.g. the routing
    580 		 * daemon for use in updating its tables).
    581 		 */
    582 		icmpgw.sin_addr = ip->ip_src;
    583 		icmpdst.sin_addr = icp->icmp_gwaddr;
    584 #ifdef	ICMPPRINTFS
    585 		if (icmpprintfs)
    586 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    587 			    icp->icmp_gwaddr);
    588 #endif
    589 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    590 		rt = NULL;
    591 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    592 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    593 		    sintosa(&icmpgw), (struct rtentry **)&rt);
    594 		if (rt != NULL && icmp_redirtimeout != 0) {
    595 			i = rt_timer_add(rt, icmp_redirect_timeout,
    596 					 icmp_redirect_timeout_q);
    597 			if (i)
    598 				log(LOG_ERR, "ICMP:  redirect failed to "
    599 				    "register timeout for route to %x, "
    600 				    "code %d\n",
    601 				    icp->icmp_ip.ip_dst.s_addr, i);
    602 		}
    603 		if (rt != NULL)
    604 			rtfree(rt);
    605 
    606 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    607 #ifdef IPSEC
    608 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    609 #endif
    610 		break;
    611 
    612 	/*
    613 	 * No kernel processing for the following;
    614 	 * just fall through to send to raw listener.
    615 	 */
    616 	case ICMP_ECHOREPLY:
    617 	case ICMP_ROUTERADVERT:
    618 	case ICMP_ROUTERSOLICIT:
    619 	case ICMP_TSTAMPREPLY:
    620 	case ICMP_IREQREPLY:
    621 	case ICMP_MASKREPLY:
    622 	default:
    623 		break;
    624 	}
    625 
    626 raw:
    627 	rip_input(m, hlen, proto);
    628 	return;
    629 
    630 freeit:
    631 	m_freem(m);
    632 	return;
    633 }
    634 
    635 /*
    636  * Reflect the ip packet back to the source
    637  */
    638 void
    639 icmp_reflect(m)
    640 	struct mbuf *m;
    641 {
    642 	struct ip *ip = mtod(m, struct ip *);
    643 	struct in_ifaddr *ia;
    644 	struct ifaddr *ifa;
    645 	struct sockaddr_in *sin = 0;
    646 	struct in_addr t;
    647 	struct mbuf *opts = 0;
    648 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    649 
    650 	if (!in_canforward(ip->ip_src) &&
    651 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    652 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    653 		m_freem(m);	/* Bad return address */
    654 		goto done;	/* ip_output() will check for broadcast */
    655 	}
    656 	t = ip->ip_dst;
    657 	ip->ip_dst = ip->ip_src;
    658 	/*
    659 	 * If the incoming packet was addressed directly to us, use
    660 	 * dst as the src for the reply.  Otherwise (broadcast or
    661 	 * anonymous), use an address which corresponds to the
    662 	 * incoming interface, with a preference for the address which
    663 	 * corresponds to the route to the destination of the ICMP.
    664 	 */
    665 
    666 	/* Look for packet addressed to us */
    667 	INADDR_TO_IA(t, ia);
    668 
    669 	/* look for packet sent to broadcast address */
    670 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    671 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    672 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    673 			if (ifa->ifa_addr->sa_family != AF_INET)
    674 				continue;
    675 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    676 				ia = ifatoia(ifa);
    677 				break;
    678 			}
    679 		}
    680 	}
    681 
    682 	if (ia)
    683 		sin = &ia->ia_addr;
    684 
    685 	icmpdst.sin_addr = t;
    686 
    687 	/* if the packet is addressed somewhere else, compute the
    688 	   source address for packets routed back to the source, and
    689 	   use that, if it's an address on the interface which
    690 	   received the packet */
    691 	if (sin == (struct sockaddr_in *)0) {
    692 		struct sockaddr_in sin_dst;
    693 		struct route icmproute;
    694 		int errornum;
    695 
    696 		sin_dst.sin_family = AF_INET;
    697 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    698 		sin_dst.sin_addr = ip->ip_dst;
    699 		bzero(&icmproute, sizeof(icmproute));
    700 		errornum = 0;
    701 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    702 		/* errornum is never used */
    703 		if (icmproute.ro_rt)
    704 			RTFREE(icmproute.ro_rt);
    705 		/* check to make sure sin is a source address on rcvif */
    706 		if (sin) {
    707 			t = sin->sin_addr;
    708 			sin = (struct sockaddr_in *)0;
    709 			INADDR_TO_IA(t, ia);
    710 			while (ia) {
    711 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    712 					sin = &ia->ia_addr;
    713 					break;
    714 				}
    715 				NEXT_IA_WITH_SAME_ADDR(ia);
    716 			}
    717 		}
    718 	}
    719 
    720 	/* if it was not addressed to us, but the route doesn't go out
    721 	   the source interface, pick an address on the source
    722 	   interface.  This can happen when routing is asymmetric, or
    723 	   when the incoming packet was encapsulated */
    724 	if (sin == (struct sockaddr_in *)0) {
    725 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    726 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    727 			if (ifa->ifa_addr->sa_family != AF_INET)
    728 				continue;
    729 			sin = &(ifatoia(ifa)->ia_addr);
    730 			break;
    731 		}
    732 	}
    733 
    734 	/*
    735 	 * The following happens if the packet was not addressed to us,
    736 	 * and was received on an interface with no IP address:
    737 	 * We find the first AF_INET address on the first non-loopback
    738 	 * interface.
    739 	 */
    740 	if (sin == (struct sockaddr_in *)0)
    741 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    742 		    ia = ia->ia_list.tqe_next) {
    743 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    744 				continue;
    745 			sin = &ia->ia_addr;
    746 			break;
    747 		}
    748 
    749 	/*
    750 	 * If we still didn't find an address, punt.  We could have an
    751 	 * interface up (and receiving packets) with no address.
    752 	 */
    753 	if (sin == (struct sockaddr_in *)0) {
    754 		m_freem(m);
    755 		goto done;
    756 	}
    757 
    758 	ip->ip_src = sin->sin_addr;
    759 	ip->ip_ttl = MAXTTL;
    760 
    761 	if (optlen > 0) {
    762 		u_char *cp;
    763 		int opt, cnt;
    764 		u_int len;
    765 
    766 		/*
    767 		 * Retrieve any source routing from the incoming packet;
    768 		 * add on any record-route or timestamp options.
    769 		 */
    770 		cp = (u_char *) (ip + 1);
    771 		if ((opts = ip_srcroute()) == 0 &&
    772 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    773 			opts->m_len = sizeof(struct in_addr);
    774 			*mtod(opts, struct in_addr *) = zeroin_addr;
    775 		}
    776 		if (opts) {
    777 #ifdef ICMPPRINTFS
    778 		    if (icmpprintfs)
    779 			    printf("icmp_reflect optlen %d rt %d => ",
    780 				optlen, opts->m_len);
    781 #endif
    782 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    783 			    opt = cp[IPOPT_OPTVAL];
    784 			    if (opt == IPOPT_EOL)
    785 				    break;
    786 			    if (opt == IPOPT_NOP)
    787 				    len = 1;
    788 			    else {
    789 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    790 					    break;
    791 				    len = cp[IPOPT_OLEN];
    792 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    793 				        len > cnt)
    794 					    break;
    795 			    }
    796 			    /*
    797 			     * Should check for overflow, but it "can't happen"
    798 			     */
    799 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    800 				opt == IPOPT_SECURITY) {
    801 				    bcopy((caddr_t)cp,
    802 					mtod(opts, caddr_t) + opts->m_len, len);
    803 				    opts->m_len += len;
    804 			    }
    805 		    }
    806 		    /* Terminate & pad, if necessary */
    807 		    if ((cnt = opts->m_len % 4) != 0) {
    808 			    for (; cnt < 4; cnt++) {
    809 				    *(mtod(opts, caddr_t) + opts->m_len) =
    810 					IPOPT_EOL;
    811 				    opts->m_len++;
    812 			    }
    813 		    }
    814 #ifdef ICMPPRINTFS
    815 		    if (icmpprintfs)
    816 			    printf("%d\n", opts->m_len);
    817 #endif
    818 		}
    819 		/*
    820 		 * Now strip out original options by copying rest of first
    821 		 * mbuf's data back, and adjust the IP length.
    822 		 */
    823 		ip->ip_len -= optlen;
    824 		ip->ip_hl = sizeof(struct ip) >> 2;
    825 		m->m_len -= optlen;
    826 		if (m->m_flags & M_PKTHDR)
    827 			m->m_pkthdr.len -= optlen;
    828 		optlen += sizeof(struct ip);
    829 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    830 			 (unsigned)(m->m_len - sizeof(struct ip)));
    831 	}
    832 	m->m_flags &= ~(M_BCAST|M_MCAST);
    833 	icmp_send(m, opts);
    834 done:
    835 	if (opts)
    836 		(void)m_free(opts);
    837 }
    838 
    839 /*
    840  * Send an icmp packet back to the ip level,
    841  * after supplying a checksum.
    842  */
    843 void
    844 icmp_send(m, opts)
    845 	struct mbuf *m;
    846 	struct mbuf *opts;
    847 {
    848 	struct ip *ip = mtod(m, struct ip *);
    849 	int hlen;
    850 	struct icmp *icp;
    851 
    852 	hlen = ip->ip_hl << 2;
    853 	m->m_data += hlen;
    854 	m->m_len -= hlen;
    855 	icp = mtod(m, struct icmp *);
    856 	icp->icmp_cksum = 0;
    857 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    858 	m->m_data -= hlen;
    859 	m->m_len += hlen;
    860 #ifdef ICMPPRINTFS
    861 	if (icmpprintfs)
    862 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    863 #endif
    864 #ifdef IPSEC
    865 	/* Don't lookup socket */
    866 	(void)ipsec_setsocket(m, NULL);
    867 #endif
    868 	(void) ip_output(m, opts, NULL, 0, NULL);
    869 }
    870 
    871 n_time
    872 iptime()
    873 {
    874 	struct timeval atv;
    875 	u_long t;
    876 
    877 	microtime(&atv);
    878 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    879 	return (htonl(t));
    880 }
    881 
    882 int
    883 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    884 	int *name;
    885 	u_int namelen;
    886 	void *oldp;
    887 	size_t *oldlenp;
    888 	void *newp;
    889 	size_t newlen;
    890 {
    891 	int arg, error;
    892 
    893 	/* All sysctl names at this level are terminal. */
    894 	if (namelen != 1)
    895 		return (ENOTDIR);
    896 
    897 	switch (name[0])
    898 	{
    899 	case ICMPCTL_MASKREPL:
    900 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    901 		break;
    902 	case ICMPCTL_RETURNDATABYTES:
    903 		arg = icmpreturndatabytes;
    904 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    905 		if (error)
    906 			break;
    907 		if ((arg >= 8) || (arg <= 512))
    908 			icmpreturndatabytes = arg;
    909 		else
    910 			error = EINVAL;
    911 		break;
    912 	case ICMPCTL_ERRPPSLIMIT:
    913 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    914 		break;
    915 	case ICMPCTL_REDIRACCEPT:
    916 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    917 				   &icmp_rediraccept);
    918 		break;
    919 	case ICMPCTL_REDIRTIMEOUT:
    920 		error = sysctl_int(oldp, oldlenp, newp, newlen,
    921 				   &icmp_redirtimeout);
    922 		if (icmp_redirect_timeout_q != NULL) {
    923 			if (icmp_redirtimeout == 0) {
    924 				rt_timer_queue_destroy(icmp_redirect_timeout_q,
    925 						       TRUE);
    926 				icmp_redirect_timeout_q = NULL;
    927 			} else {
    928 				rt_timer_queue_change(icmp_redirect_timeout_q,
    929 						      icmp_redirtimeout);
    930 			}
    931 		} else if (icmp_redirtimeout > 0) {
    932 			icmp_redirect_timeout_q =
    933 				rt_timer_queue_create(icmp_redirtimeout);
    934 		}
    935 		return (error);
    936 
    937 		break;
    938 	default:
    939 		error = ENOPROTOOPT;
    940 		break;
    941 	}
    942 	return error;
    943 }
    944 
    945 void
    946 icmp_mtudisc(icp, faddr)
    947 	struct icmp *icp;
    948 	struct in_addr faddr;
    949 {
    950 	struct icmp_mtudisc_callback *mc;
    951 	struct sockaddr *dst = sintosa(&icmpsrc);
    952 	struct rtentry *rt;
    953 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    954 	int    error;
    955 
    956 	/* Table of common MTUs: */
    957 
    958 	static const u_int mtu_table[] = { 65535, 65280, 32000, 17914, 9180,
    959 	    8166, 4352, 2002, 1492, 1006, 508, 296, 68, 0};
    960 
    961 	rt = rtalloc1(dst, 1);
    962 	if (rt == 0)
    963 		return;
    964 
    965 	/* If we didn't get a host route, allocate one */
    966 
    967 	if ((rt->rt_flags & RTF_HOST) == 0) {
    968 		struct rtentry *nrt;
    969 
    970 		error = rtrequest((int) RTM_ADD, dst,
    971 		    (struct sockaddr *) rt->rt_gateway,
    972 		    (struct sockaddr *) 0,
    973 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    974 		if (error) {
    975 			rtfree(rt);
    976 			return;
    977 		}
    978 		nrt->rt_rmx = rt->rt_rmx;
    979 		rtfree(rt);
    980 		rt = nrt;
    981 	}
    982 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    983 	if (error) {
    984 		rtfree(rt);
    985 		return;
    986 	}
    987 
    988 	if (mtu == 0) {
    989 		int i = 0;
    990 
    991 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    992 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    993 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    994 			mtu -= (icp->icmp_ip.ip_hl << 2);
    995 
    996 		/* If we still can't guess a value, try the route */
    997 
    998 		if (mtu == 0) {
    999 			mtu = rt->rt_rmx.rmx_mtu;
   1000 
   1001 			/* If no route mtu, default to the interface mtu */
   1002 
   1003 			if (mtu == 0)
   1004 				mtu = rt->rt_ifp->if_mtu;
   1005 		}
   1006 
   1007 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
   1008 			if (mtu > mtu_table[i]) {
   1009 				mtu = mtu_table[i];
   1010 				break;
   1011 			}
   1012 	}
   1013 
   1014 	/*
   1015 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
   1016 	 *	  to turn off PMTU for a route, and the kernel can
   1017 	 *	  set it to indicate a serious problem with PMTU
   1018 	 *	  on a route.  We should be using a separate flag
   1019 	 *	  for the kernel to indicate this.
   1020 	 */
   1021 
   1022 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1023 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
   1024 			rt->rt_rmx.rmx_locks |= RTV_MTU;
   1025 		else if (rt->rt_rmx.rmx_mtu > mtu ||
   1026 			 rt->rt_rmx.rmx_mtu == 0) {
   1027 			icmpstat.icps_pmtuchg++;
   1028 			rt->rt_rmx.rmx_mtu = mtu;
   1029 		}
   1030 	}
   1031 
   1032 	if (rt)
   1033 		rtfree(rt);
   1034 
   1035 	/*
   1036 	 * Notify protocols that the MTU for this destination
   1037 	 * has changed.
   1038 	 */
   1039 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
   1040 	     mc = LIST_NEXT(mc, mc_list))
   1041 		(*mc->mc_func)(faddr);
   1042 }
   1043 
   1044 /*
   1045  * Return the next larger or smaller MTU plateau (table from RFC 1191)
   1046  * given current value MTU.  If DIR is less than zero, a larger plateau
   1047  * is returned; otherwise, a smaller value is returned.
   1048  */
   1049 int
   1050 ip_next_mtu(mtu, dir)	/* XXX */
   1051 	int mtu;
   1052 	int dir;
   1053 {
   1054 	static const u_int mtutab[] = {
   1055 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
   1056 		68, 0
   1057 	};
   1058 	int i;
   1059 
   1060 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
   1061 		if (mtu >= mtutab[i])
   1062 			break;
   1063 	}
   1064 
   1065 	if (dir < 0) {
   1066 		if (i == 0) {
   1067 			return 0;
   1068 		} else {
   1069 			return mtutab[i - 1];
   1070 		}
   1071 	} else {
   1072 		if (mtutab[i] == 0) {
   1073 			return 0;
   1074 		} else if(mtu > mtutab[i]) {
   1075 			return mtutab[i];
   1076 		} else {
   1077 			return mtutab[i + 1];
   1078 		}
   1079 	}
   1080 }
   1081 
   1082 static void
   1083 icmp_mtudisc_timeout(rt, r)
   1084 	struct rtentry *rt;
   1085 	struct rttimer *r;
   1086 {
   1087 	if (rt == NULL)
   1088 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1089 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1090 	    (RTF_DYNAMIC | RTF_HOST)) {
   1091 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1092 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1093 	} else {
   1094 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1095 			rt->rt_rmx.rmx_mtu = 0;
   1096 		}
   1097 	}
   1098 }
   1099 
   1100 static void
   1101 icmp_redirect_timeout(rt, r)
   1102 	struct rtentry *rt;
   1103 	struct rttimer *r;
   1104 {
   1105 	if (rt == NULL)
   1106 		panic("icmp_redirect_timeout:  bad route to timeout");
   1107 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1108 	    (RTF_DYNAMIC | RTF_HOST)) {
   1109 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1110 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1111 	}
   1112 }
   1113 
   1114 /*
   1115  * Perform rate limit check.
   1116  * Returns 0 if it is okay to send the icmp packet.
   1117  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1118  * limitation.
   1119  *
   1120  * XXX per-destination/type check necessary?
   1121  */
   1122 static int
   1123 icmp_ratelimit(dst, type, code)
   1124 	const struct in_addr *dst;
   1125 	const int type;			/* not used at this moment */
   1126 	const int code;			/* not used at this moment */
   1127 {
   1128 
   1129 	/* PPS limit */
   1130 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1131 	    icmperrppslim)) {
   1132 		/* The packet is subject to rate limit */
   1133 		return 1;
   1134 	}
   1135 
   1136 	/*okay to send*/
   1137 	return 0;
   1138 }
   1139