Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.47.2.2
      1 /*	$NetBSD: ip_icmp.c,v 1.47.2.2 2000/07/28 16:58:09 sommerfeld Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    102  */
    103 
    104 #include "opt_ipsec.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/mbuf.h>
    110 #include <sys/protosw.h>
    111 #include <sys/socket.h>
    112 #include <sys/time.h>
    113 #include <sys/kernel.h>
    114 #include <sys/proc.h>
    115 
    116 #include <vm/vm.h>
    117 #include <sys/sysctl.h>
    118 
    119 #include <net/if.h>
    120 #include <net/route.h>
    121 
    122 #include <netinet/in.h>
    123 #include <netinet/in_systm.h>
    124 #include <netinet/in_var.h>
    125 #include <netinet/ip.h>
    126 #include <netinet/ip_icmp.h>
    127 #include <netinet/ip_var.h>
    128 #include <netinet/in_pcb.h>
    129 #include <netinet/icmp_var.h>
    130 
    131 #ifdef IPSEC
    132 #include <netinet6/ipsec.h>
    133 #include <netkey/key.h>
    134 #include <netkey/key_debug.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 int	icmpreturndatabytes = 8;
    150 
    151 #if 0
    152 static int	ip_next_mtu __P((int, int));
    153 #else
    154 /*static*/ int	ip_next_mtu __P((int, int));
    155 #endif
    156 
    157 extern	struct timeval icmperrratelim;
    158 
    159 static void icmp_mtudisc __P((struct icmp *));
    160 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    161 
    162 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    163 
    164 /*
    165  * Generate an error packet of type error
    166  * in response to bad packet ip.
    167  */
    168 void
    169 icmp_error(n, type, code, dest, destifp)
    170 	struct mbuf *n;
    171 	int type, code;
    172 	n_long dest;
    173 	struct ifnet *destifp;
    174 {
    175 	struct ip *oip = mtod(n, struct ip *), *nip;
    176 	unsigned oiplen = oip->ip_hl << 2;
    177 	struct icmp *icp;
    178 	struct mbuf *m;
    179 	unsigned icmplen, mblen;
    180 
    181 #ifdef ICMPPRINTFS
    182 	if (icmpprintfs)
    183 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    184 #endif
    185 	if (type != ICMP_REDIRECT)
    186 		icmpstat.icps_error++;
    187 	/*
    188 	 * Don't send error if the original packet was encrypted.
    189 	 * Don't send error if not the first fragment of message.
    190 	 * Don't error if the old packet protocol was ICMP
    191 	 * error message, only known informational types.
    192 	 */
    193 	if (n->m_flags & M_DECRYPTED)
    194 		goto freeit;
    195 	if (oip->ip_off &~ (IP_MF|IP_DF))
    196 		goto freeit;
    197 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    198 	  n->m_len >= oiplen + ICMP_MINLEN &&
    199 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    200 		icmpstat.icps_oldicmp++;
    201 		goto freeit;
    202 	}
    203 	/* Don't send error in response to a multicast or broadcast packet */
    204 	if (n->m_flags & (M_BCAST|M_MCAST))
    205 		goto freeit;
    206 
    207 	/*
    208 	 * First, do a rate limitation check.
    209 	 */
    210 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    211 		/* XXX stat */
    212 		goto freeit;
    213 	}
    214 
    215 	/*
    216 	 * Now, formulate icmp message
    217 	 */
    218 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    219 	/*
    220 	 * Defend against mbuf chains shorter than oip->ip_len:
    221 	 */
    222 	mblen = 0;
    223 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    224 		mblen += m->m_len;
    225 	icmplen = min(mblen, icmplen);
    226 
    227 	/*
    228 	 * As we are not required to return everything we have,
    229 	 * we return whatever we can return at ease.
    230 	 *
    231 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    232 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    233 	 * icmp_sysctl will keep things below that limit.
    234 	 */
    235 
    236 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    237 
    238 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    239 		icmplen = MCLBYTES - ICMP_MINLEN;
    240 
    241 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    242 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    243 		MCLGET(m, M_DONTWAIT);
    244 		if ((m->m_flags & M_EXT) == 0) {
    245 			m_freem(m);
    246 			m = NULL;
    247 		}
    248 	}
    249 	if (m == NULL)
    250 		goto freeit;
    251 	m->m_len = icmplen + ICMP_MINLEN;
    252 	if ((m->m_flags & M_EXT) == 0)
    253 		MH_ALIGN(m, m->m_len);
    254 	icp = mtod(m, struct icmp *);
    255 	if ((u_int)type > ICMP_MAXTYPE)
    256 		panic("icmp_error");
    257 	icmpstat.icps_outhist[type]++;
    258 	icp->icmp_type = type;
    259 	if (type == ICMP_REDIRECT)
    260 		icp->icmp_gwaddr.s_addr = dest;
    261 	else {
    262 		icp->icmp_void = 0;
    263 		/*
    264 		 * The following assignments assume an overlay with the
    265 		 * zeroed icmp_void field.
    266 		 */
    267 		if (type == ICMP_PARAMPROB) {
    268 			icp->icmp_pptr = code;
    269 			code = 0;
    270 		} else if (type == ICMP_UNREACH &&
    271 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    272 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    273 	}
    274 
    275 	HTONS(oip->ip_off);
    276 	HTONS(oip->ip_len);
    277 	icp->icmp_code = code;
    278 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    279 	nip = &icp->icmp_ip;
    280 
    281 	/*
    282 	 * Now, copy old ip header (without options)
    283 	 * in front of icmp message.
    284 	 */
    285 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    286 		panic("icmp len");
    287 	m->m_data -= sizeof(struct ip);
    288 	m->m_len += sizeof(struct ip);
    289 	m->m_pkthdr.len = m->m_len;
    290 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    291 	nip = mtod(m, struct ip *);
    292 	/* ip_v set in ip_output */
    293 	nip->ip_hl = sizeof(struct ip) >> 2;
    294 	nip->ip_tos = 0;
    295 	nip->ip_len = m->m_len;
    296 	/* ip_id set in ip_output */
    297 	nip->ip_off = 0;
    298 	/* ip_ttl set in icmp_reflect */
    299 	nip->ip_p = IPPROTO_ICMP;
    300 	nip->ip_src = oip->ip_src;
    301 	nip->ip_dst = oip->ip_dst;
    302 	icmp_reflect(m);
    303 
    304 freeit:
    305 	m_freem(n);
    306 }
    307 
    308 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    309 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    310 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    311 struct sockaddr_in icmpmask = { 8, 0 };
    312 
    313 /*
    314  * Process a received ICMP message.
    315  */
    316 void
    317 #if __STDC__
    318 icmp_input(struct mbuf *m, ...)
    319 #else
    320 icmp_input(m, va_alist)
    321 	struct mbuf *m;
    322 	va_dcl
    323 #endif
    324 {
    325 	int proto;
    326 	struct icmp *icp;
    327 	struct ip *ip = mtod(m, struct ip *);
    328 	int icmplen;
    329 	int i;
    330 	struct in_ifaddr *ia;
    331 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    332 	int code;
    333 	int hlen;
    334 	va_list ap;
    335 
    336 	va_start(ap, m);
    337 	hlen = va_arg(ap, int);
    338 	proto = va_arg(ap, int);
    339 	va_end(ap);
    340 
    341 	/*
    342 	 * Locate icmp structure in mbuf, and check
    343 	 * that not corrupted and of at least minimum length.
    344 	 */
    345 	icmplen = ip->ip_len - hlen;
    346 #ifdef ICMPPRINTFS
    347 	if (icmpprintfs)
    348 		printf("icmp_input from %x to %x, len %d\n",
    349 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    350 		    icmplen);
    351 #endif
    352 	if (icmplen < ICMP_MINLEN) {
    353 		icmpstat.icps_tooshort++;
    354 		goto freeit;
    355 	}
    356 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    357 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    358 		icmpstat.icps_tooshort++;
    359 		return;
    360 	}
    361 	ip = mtod(m, struct ip *);
    362 	m->m_len -= hlen;
    363 	m->m_data += hlen;
    364 	icp = mtod(m, struct icmp *);
    365 	if (in_cksum(m, icmplen)) {
    366 		icmpstat.icps_checksum++;
    367 		goto freeit;
    368 	}
    369 	m->m_len += hlen;
    370 	m->m_data -= hlen;
    371 
    372 #ifdef ICMPPRINTFS
    373 	/*
    374 	 * Message type specific processing.
    375 	 */
    376 	if (icmpprintfs)
    377 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    378 		    icp->icmp_code);
    379 #endif
    380 #ifdef IPSEC
    381 	/* drop it if it does not match the policy */
    382 	if (ipsec4_in_reject(m, NULL)) {
    383 		ipsecstat.in_polvio++;
    384 		goto freeit;
    385 	}
    386 #endif
    387 	if (icp->icmp_type > ICMP_MAXTYPE)
    388 		goto raw;
    389 	icmpstat.icps_inhist[icp->icmp_type]++;
    390 	code = icp->icmp_code;
    391 	switch (icp->icmp_type) {
    392 
    393 	case ICMP_UNREACH:
    394 		switch (code) {
    395 			case ICMP_UNREACH_NET:
    396 			case ICMP_UNREACH_HOST:
    397 			case ICMP_UNREACH_PROTOCOL:
    398 			case ICMP_UNREACH_PORT:
    399 			case ICMP_UNREACH_SRCFAIL:
    400 				code += PRC_UNREACH_NET;
    401 				break;
    402 
    403 			case ICMP_UNREACH_NEEDFRAG:
    404 				code = PRC_MSGSIZE;
    405 				break;
    406 
    407 			case ICMP_UNREACH_NET_UNKNOWN:
    408 			case ICMP_UNREACH_NET_PROHIB:
    409 			case ICMP_UNREACH_TOSNET:
    410 				code = PRC_UNREACH_NET;
    411 				break;
    412 
    413 			case ICMP_UNREACH_HOST_UNKNOWN:
    414 			case ICMP_UNREACH_ISOLATED:
    415 			case ICMP_UNREACH_HOST_PROHIB:
    416 			case ICMP_UNREACH_TOSHOST:
    417 				code = PRC_UNREACH_HOST;
    418 				break;
    419 
    420 			default:
    421 				goto badcode;
    422 		}
    423 		goto deliver;
    424 
    425 	case ICMP_TIMXCEED:
    426 		if (code > 1)
    427 			goto badcode;
    428 		code += PRC_TIMXCEED_INTRANS;
    429 		goto deliver;
    430 
    431 	case ICMP_PARAMPROB:
    432 		if (code > 1)
    433 			goto badcode;
    434 		code = PRC_PARAMPROB;
    435 		goto deliver;
    436 
    437 	case ICMP_SOURCEQUENCH:
    438 		if (code)
    439 			goto badcode;
    440 		code = PRC_QUENCH;
    441 		goto deliver;
    442 
    443 	deliver:
    444 		/*
    445 		 * Problem with datagram; advise higher level routines.
    446 		 */
    447 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    448 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    449 			icmpstat.icps_badlen++;
    450 			goto freeit;
    451 		}
    452 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    453 			goto badcode;
    454 		NTOHS(icp->icmp_ip.ip_len);
    455 #ifdef ICMPPRINTFS
    456 		if (icmpprintfs)
    457 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    458 #endif
    459 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    460 		if (code == PRC_MSGSIZE && ip_mtudisc)
    461 			icmp_mtudisc(icp);
    462 		/*
    463 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
    464 		 * notification to TCP layer.
    465 		 */
    466 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    467 		if (ctlfunc)
    468 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
    469 		break;
    470 
    471 	badcode:
    472 		icmpstat.icps_badcode++;
    473 		break;
    474 
    475 	case ICMP_ECHO:
    476 		icp->icmp_type = ICMP_ECHOREPLY;
    477 		goto reflect;
    478 
    479 	case ICMP_TSTAMP:
    480 		if (icmplen < ICMP_TSLEN) {
    481 			icmpstat.icps_badlen++;
    482 			break;
    483 		}
    484 		icp->icmp_type = ICMP_TSTAMPREPLY;
    485 		icp->icmp_rtime = iptime();
    486 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    487 		goto reflect;
    488 
    489 	case ICMP_MASKREQ:
    490 		if (icmpmaskrepl == 0)
    491 			break;
    492 		/*
    493 		 * We are not able to respond with all ones broadcast
    494 		 * unless we receive it over a point-to-point interface.
    495 		 */
    496 		if (icmplen < ICMP_MASKLEN) {
    497 			icmpstat.icps_badlen++;
    498 			break;
    499 		}
    500 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    501 		    in_nullhost(ip->ip_dst))
    502 			icmpdst.sin_addr = ip->ip_src;
    503 		else
    504 			icmpdst.sin_addr = ip->ip_dst;
    505 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    506 		    m->m_pkthdr.rcvif));
    507 		if (ia == 0)
    508 			break;
    509 		icp->icmp_type = ICMP_MASKREPLY;
    510 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    511 		if (in_nullhost(ip->ip_src)) {
    512 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    513 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    514 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    515 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    516 		}
    517 reflect:
    518 		icmpstat.icps_reflect++;
    519 		icmpstat.icps_outhist[icp->icmp_type]++;
    520 		icmp_reflect(m);
    521 		return;
    522 
    523 	case ICMP_REDIRECT:
    524 		if (code > 3)
    525 			goto badcode;
    526 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    527 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    528 			icmpstat.icps_badlen++;
    529 			break;
    530 		}
    531 		/*
    532 		 * Short circuit routing redirects to force
    533 		 * immediate change in the kernel's routing
    534 		 * tables.  The message is also handed to anyone
    535 		 * listening on a raw socket (e.g. the routing
    536 		 * daemon for use in updating its tables).
    537 		 */
    538 		icmpgw.sin_addr = ip->ip_src;
    539 		icmpdst.sin_addr = icp->icmp_gwaddr;
    540 #ifdef	ICMPPRINTFS
    541 		if (icmpprintfs)
    542 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    543 			    icp->icmp_gwaddr);
    544 #endif
    545 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    546 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    547 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    548 		    sintosa(&icmpgw), (struct rtentry **)0);
    549 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    550 #ifdef IPSEC
    551 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    552 #endif
    553 		break;
    554 
    555 	/*
    556 	 * No kernel processing for the following;
    557 	 * just fall through to send to raw listener.
    558 	 */
    559 	case ICMP_ECHOREPLY:
    560 	case ICMP_ROUTERADVERT:
    561 	case ICMP_ROUTERSOLICIT:
    562 	case ICMP_TSTAMPREPLY:
    563 	case ICMP_IREQREPLY:
    564 	case ICMP_MASKREPLY:
    565 	default:
    566 		break;
    567 	}
    568 
    569 raw:
    570 	rip_input(m, hlen, proto);
    571 	return;
    572 
    573 freeit:
    574 	m_freem(m);
    575 	return;
    576 }
    577 
    578 /*
    579  * Reflect the ip packet back to the source
    580  */
    581 void
    582 icmp_reflect(m)
    583 	struct mbuf *m;
    584 {
    585 	struct ip *ip = mtod(m, struct ip *);
    586 	struct in_ifaddr *ia;
    587 	struct ifaddr *ifa;
    588 	struct sockaddr_in *sin = 0;
    589 	struct in_addr t;
    590 	struct mbuf *opts = 0;
    591 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    592 
    593 	if (!in_canforward(ip->ip_src) &&
    594 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    595 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    596 		m_freem(m);	/* Bad return address */
    597 		goto done;	/* ip_output() will check for broadcast */
    598 	}
    599 	t = ip->ip_dst;
    600 	ip->ip_dst = ip->ip_src;
    601 	/*
    602 	 * If the incoming packet was addressed directly to us, use
    603 	 * dst as the src for the reply.  Otherwise (broadcast or
    604 	 * anonymous), use an address which corresponds to the
    605 	 * incoming interface, with a preference for the address which
    606 	 * corresponds to the route to the destination of the ICMP.
    607 	 */
    608 
    609 	/* Look for packet addressed to us */
    610 	INADDR_TO_IA(t, ia);
    611 
    612 	/* look for packet sent to broadcast address */
    613 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    614 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    615 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    616 			if (ifa->ifa_addr->sa_family != AF_INET)
    617 				continue;
    618 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    619 				ia = ifatoia(ifa);
    620 				break;
    621 			}
    622 		}
    623 	}
    624 
    625 	if (ia)
    626 		sin = &ia->ia_addr;
    627 
    628 	icmpdst.sin_addr = t;
    629 
    630 	/* if the packet is addressed somewhere else, compute the
    631 	   source address for packets routed back to the source, and
    632 	   use that, if it's an address on the interface which
    633 	   received the packet */
    634 	if (sin == (struct sockaddr_in *)0) {
    635 		struct sockaddr_in sin_dst;
    636 		struct route icmproute;
    637 		int errornum;
    638 
    639 		sin_dst.sin_family = AF_INET;
    640 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    641 		sin_dst.sin_addr = ip->ip_dst;
    642 		bzero(&icmproute, sizeof(icmproute));
    643 		errornum = 0;
    644 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    645 		/* errornum is never used */
    646 		if (icmproute.ro_rt)
    647 			RTFREE(icmproute.ro_rt);
    648 		/* check to make sure sin is a source address on rcvif */
    649 		if (sin) {
    650 			t = sin->sin_addr;
    651 			sin = (struct sockaddr_in *)0;
    652 			INADDR_TO_IA(t, ia);
    653 			while (ia) {
    654 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    655 					sin = &ia->ia_addr;
    656 					break;
    657 				}
    658 				NEXT_IA_WITH_SAME_ADDR(ia);
    659 			}
    660 		}
    661 	}
    662 
    663 	/* if it was not addressed to us, but the route doesn't go out
    664 	   the source interface, pick an address on the source
    665 	   interface.  This can happen when routing is asymmetric, or
    666 	   when the incoming packet was encapsulated */
    667 	if (sin == (struct sockaddr_in *)0) {
    668 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    669 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    670 			if (ifa->ifa_addr->sa_family != AF_INET)
    671 				continue;
    672 			sin = &(ifatoia(ifa)->ia_addr);
    673 			break;
    674 		}
    675 	}
    676 
    677 	/*
    678 	 * The following happens if the packet was not addressed to us,
    679 	 * and was received on an interface with no IP address:
    680 	 * We find the first AF_INET address on the first non-loopback
    681 	 * interface.
    682 	 */
    683 	if (sin == (struct sockaddr_in *)0)
    684 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    685 		    ia = ia->ia_list.tqe_next) {
    686 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    687 				continue;
    688 			sin = &ia->ia_addr;
    689 			break;
    690 		}
    691 
    692 	/*
    693 	 * If we still didn't find an address, punt.  We could have an
    694 	 * interface up (and receiving packets) with no address.
    695 	 */
    696 	if (sin == (struct sockaddr_in *)0) {
    697 		m_freem(m);
    698 		goto done;
    699 	}
    700 
    701 	ip->ip_src = sin->sin_addr;
    702 	ip->ip_ttl = MAXTTL;
    703 
    704 	if (optlen > 0) {
    705 		u_char *cp;
    706 		int opt, cnt;
    707 		u_int len;
    708 
    709 		/*
    710 		 * Retrieve any source routing from the incoming packet;
    711 		 * add on any record-route or timestamp options.
    712 		 */
    713 		cp = (u_char *) (ip + 1);
    714 		if ((opts = ip_srcroute()) == 0 &&
    715 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    716 			opts->m_len = sizeof(struct in_addr);
    717 			*mtod(opts, struct in_addr *) = zeroin_addr;
    718 		}
    719 		if (opts) {
    720 #ifdef ICMPPRINTFS
    721 		    if (icmpprintfs)
    722 			    printf("icmp_reflect optlen %d rt %d => ",
    723 				optlen, opts->m_len);
    724 #endif
    725 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    726 			    opt = cp[IPOPT_OPTVAL];
    727 			    if (opt == IPOPT_EOL)
    728 				    break;
    729 			    if (opt == IPOPT_NOP)
    730 				    len = 1;
    731 			    else {
    732 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    733 					    break;
    734 				    len = cp[IPOPT_OLEN];
    735 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    736 				        len > cnt)
    737 					    break;
    738 			    }
    739 			    /*
    740 			     * Should check for overflow, but it "can't happen"
    741 			     */
    742 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    743 				opt == IPOPT_SECURITY) {
    744 				    bcopy((caddr_t)cp,
    745 					mtod(opts, caddr_t) + opts->m_len, len);
    746 				    opts->m_len += len;
    747 			    }
    748 		    }
    749 		    /* Terminate & pad, if necessary */
    750 		    if ((cnt = opts->m_len % 4) != 0) {
    751 			    for (; cnt < 4; cnt++) {
    752 				    *(mtod(opts, caddr_t) + opts->m_len) =
    753 					IPOPT_EOL;
    754 				    opts->m_len++;
    755 			    }
    756 		    }
    757 #ifdef ICMPPRINTFS
    758 		    if (icmpprintfs)
    759 			    printf("%d\n", opts->m_len);
    760 #endif
    761 		}
    762 		/*
    763 		 * Now strip out original options by copying rest of first
    764 		 * mbuf's data back, and adjust the IP length.
    765 		 */
    766 		ip->ip_len -= optlen;
    767 		ip->ip_hl = sizeof(struct ip) >> 2;
    768 		m->m_len -= optlen;
    769 		if (m->m_flags & M_PKTHDR)
    770 			m->m_pkthdr.len -= optlen;
    771 		optlen += sizeof(struct ip);
    772 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    773 			 (unsigned)(m->m_len - sizeof(struct ip)));
    774 	}
    775 	m->m_flags &= ~(M_BCAST|M_MCAST);
    776 	icmp_send(m, opts);
    777 done:
    778 	if (opts)
    779 		(void)m_free(opts);
    780 }
    781 
    782 /*
    783  * Send an icmp packet back to the ip level,
    784  * after supplying a checksum.
    785  */
    786 void
    787 icmp_send(m, opts)
    788 	struct mbuf *m;
    789 	struct mbuf *opts;
    790 {
    791 	struct ip *ip = mtod(m, struct ip *);
    792 	int hlen;
    793 	struct icmp *icp;
    794 
    795 	hlen = ip->ip_hl << 2;
    796 	m->m_data += hlen;
    797 	m->m_len -= hlen;
    798 	icp = mtod(m, struct icmp *);
    799 	icp->icmp_cksum = 0;
    800 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    801 	m->m_data -= hlen;
    802 	m->m_len += hlen;
    803 #ifdef ICMPPRINTFS
    804 	if (icmpprintfs)
    805 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    806 #endif
    807 #ifdef IPSEC
    808 	/* Don't lookup socket */
    809 	ipsec_setsocket(m, NULL);
    810 #endif
    811 	(void) ip_output(m, opts, NULL, 0, NULL);
    812 }
    813 
    814 n_time
    815 iptime()
    816 {
    817 	struct timeval atv;
    818 	u_long t;
    819 
    820 	microtime(&atv);
    821 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    822 	return (htonl(t));
    823 }
    824 
    825 int
    826 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    827 	int *name;
    828 	u_int namelen;
    829 	void *oldp;
    830 	size_t *oldlenp;
    831 	void *newp;
    832 	size_t newlen;
    833 {
    834 	int arg, error, s;
    835 
    836 	/* All sysctl names at this level are terminal. */
    837 	if (namelen != 1)
    838 		return (ENOTDIR);
    839 
    840 	switch (name[0])
    841 	{
    842 	case ICMPCTL_MASKREPL:
    843 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    844 		break;
    845 	case ICMPCTL_ERRRATELIMIT:
    846 		/*
    847 		 * The sysctl specifies the rate in usec-between-icmp,
    848 		 * so we must convert from/to a timeval.
    849 		 */
    850 		arg = (icmperrratelim.tv_sec * 1000000) +
    851 		    icmperrratelim.tv_usec;
    852 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    853 		if (error)
    854 			break;
    855 		if (arg >= 0) {
    856 			s = splsoftnet();
    857 			icmperrratelim.tv_sec = arg / 1000000;
    858 			icmperrratelim.tv_usec = arg % 1000000;
    859 			splx(s);
    860 		} else
    861 			error = EINVAL;
    862 		break;
    863 	case ICMPCTL_RETURNDATABYTES:
    864 		arg = icmpreturndatabytes;
    865 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    866 		if (error)
    867 			break;
    868 		if ((arg >= 8) || (arg <= 512))
    869 			icmpreturndatabytes = arg;
    870 		else
    871 			error = EINVAL;
    872 		break;
    873 	default:
    874 		error = ENOPROTOOPT;
    875 		break;
    876 	}
    877 	return error;
    878 }
    879 
    880 static void
    881 icmp_mtudisc(icp)
    882 	struct icmp *icp;
    883 {
    884 	struct rtentry *rt;
    885 	struct sockaddr *dst = sintosa(&icmpsrc);
    886 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    887 	int    error;
    888 
    889 	/* Table of common MTUs: */
    890 
    891 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    892 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    893 
    894 	rt = rtalloc1(dst, 1);
    895 	if (rt == 0)
    896 		return;
    897 
    898 	/* If we didn't get a host route, allocate one */
    899 
    900 	if ((rt->rt_flags & RTF_HOST) == 0) {
    901 		struct rtentry *nrt;
    902 
    903 		error = rtrequest((int) RTM_ADD, dst,
    904 		    (struct sockaddr *) rt->rt_gateway,
    905 		    (struct sockaddr *) 0,
    906 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    907 		if (error) {
    908 			rtfree(rt);
    909 			rtfree(nrt);
    910 			return;
    911 		}
    912 		nrt->rt_rmx = rt->rt_rmx;
    913 		rtfree(rt);
    914 		rt = nrt;
    915 	}
    916 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    917 	if (error) {
    918 		rtfree(rt);
    919 		return;
    920 	}
    921 
    922 	if (mtu == 0) {
    923 		int i = 0;
    924 
    925 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    926 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    927 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    928 			mtu -= (icp->icmp_ip.ip_hl << 2);
    929 
    930 		/* If we still can't guess a value, try the route */
    931 
    932 		if (mtu == 0) {
    933 			mtu = rt->rt_rmx.rmx_mtu;
    934 
    935 			/* If no route mtu, default to the interface mtu */
    936 
    937 			if (mtu == 0)
    938 				mtu = rt->rt_ifp->if_mtu;
    939 		}
    940 
    941 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    942 			if (mtu > mtu_table[i]) {
    943 				mtu = mtu_table[i];
    944 				break;
    945 			}
    946 	}
    947 
    948 	/*
    949 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    950 	 *	  to turn off PMTU for a route, and the kernel can
    951 	 *	  set it to indicate a serious problem with PMTU
    952 	 *	  on a route.  We should be using a separate flag
    953 	 *	  for the kernel to indicate this.
    954 	 */
    955 
    956 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    957 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    958 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    959 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    960 			 rt->rt_rmx.rmx_mtu == 0)
    961 			rt->rt_rmx.rmx_mtu = mtu;
    962 	}
    963 
    964 	if (rt)
    965 		rtfree(rt);
    966 }
    967 
    968 /*
    969  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    970  * given current value MTU.  If DIR is less than zero, a larger plateau
    971  * is returned; otherwise, a smaller value is returned.
    972  */
    973 int
    974 ip_next_mtu(mtu, dir)	/* XXX */
    975 	int mtu;
    976 	int dir;
    977 {
    978 	static int mtutab[] = {
    979 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    980 		68, 0
    981 	};
    982 	int i;
    983 
    984 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
    985 		if (mtu >= mtutab[i])
    986 			break;
    987 	}
    988 
    989 	if (dir < 0) {
    990 		if (i == 0) {
    991 			return 0;
    992 		} else {
    993 			return mtutab[i - 1];
    994 		}
    995 	} else {
    996 		if (mtutab[i] == 0) {
    997 			return 0;
    998 		} else if(mtu > mtutab[i]) {
    999 			return mtutab[i];
   1000 		} else {
   1001 			return mtutab[i + 1];
   1002 		}
   1003 	}
   1004 }
   1005 
   1006 static void
   1007 icmp_mtudisc_timeout(rt, r)
   1008 	struct rtentry *rt;
   1009 	struct rttimer *r;
   1010 {
   1011 	if (rt == NULL)
   1012 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1013 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1014 	    (RTF_DYNAMIC | RTF_HOST)) {
   1015 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1016 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1017 	} else {
   1018 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1019 			rt->rt_rmx.rmx_mtu = 0;
   1020 		}
   1021 	}
   1022 }
   1023 
   1024 /*
   1025  * Perform rate limit check.
   1026  * Returns 0 if it is okay to send the icmp packet.
   1027  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1028  * limitation.
   1029  *
   1030  * XXX per-destination/type check necessary?
   1031  */
   1032 static int
   1033 icmp_ratelimit(dst, type, code)
   1034 	const struct in_addr *dst;
   1035 	const int type;			/* not used at this moment */
   1036 	const int code;			/* not used at this moment */
   1037 {
   1038 	static struct timeval icmperrratelim_last;
   1039 	struct in_ifaddr *ia;
   1040 
   1041 	/*
   1042 	 * Don't rate-limit if it's for us!
   1043 	 */
   1044 	INADDR_TO_IA(*dst, ia);
   1045 	if (ia != NULL)
   1046 		return 0;
   1047 
   1048 	/*
   1049 	 * ratecheck() returns true if it is okay to send.  We return
   1050 	 * true if it is not okay to send.
   1051 	 */
   1052 	return (ratecheck(&icmperrratelim_last, &icmperrratelim) == 0);
   1053 }
   1054