Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.62
      1 /*	$NetBSD: ip_icmp.c,v 1.62 2001/10/29 07:02:33 simonb Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * This code is derived from software contributed to The NetBSD Foundation
     41  * by Jason R. Thorpe of Zembu Labs, Inc.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the NetBSD
     54  *	Foundation, Inc. and its contributors.
     55  * 4. Neither the name of The NetBSD Foundation nor the names of its
     56  *    contributors may be used to endorse or promote products derived
     57  *    from this software without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     69  * POSSIBILITY OF SUCH DAMAGE.
     70  */
     71 
     72 /*
     73  * Copyright (c) 1982, 1986, 1988, 1993
     74  *	The Regents of the University of California.  All rights reserved.
     75  *
     76  * Redistribution and use in source and binary forms, with or without
     77  * modification, are permitted provided that the following conditions
     78  * are met:
     79  * 1. Redistributions of source code must retain the above copyright
     80  *    notice, this list of conditions and the following disclaimer.
     81  * 2. Redistributions in binary form must reproduce the above copyright
     82  *    notice, this list of conditions and the following disclaimer in the
     83  *    documentation and/or other materials provided with the distribution.
     84  * 3. All advertising materials mentioning features or use of this software
     85  *    must display the following acknowledgement:
     86  *	This product includes software developed by the University of
     87  *	California, Berkeley and its contributors.
     88  * 4. Neither the name of the University nor the names of its contributors
     89  *    may be used to endorse or promote products derived from this software
     90  *    without specific prior written permission.
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    102  * SUCH DAMAGE.
    103  *
    104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    105  */
    106 
    107 #include "opt_ipsec.h"
    108 
    109 #include <sys/param.h>
    110 #include <sys/systm.h>
    111 #include <sys/malloc.h>
    112 #include <sys/mbuf.h>
    113 #include <sys/protosw.h>
    114 #include <sys/socket.h>
    115 #include <sys/time.h>
    116 #include <sys/kernel.h>
    117 #include <sys/sysctl.h>
    118 
    119 #include <net/if.h>
    120 #include <net/route.h>
    121 
    122 #include <netinet/in.h>
    123 #include <netinet/in_systm.h>
    124 #include <netinet/in_var.h>
    125 #include <netinet/ip.h>
    126 #include <netinet/ip_icmp.h>
    127 #include <netinet/ip_var.h>
    128 #include <netinet/in_pcb.h>
    129 #include <netinet/icmp_var.h>
    130 
    131 #ifdef IPSEC
    132 #include <netinet6/ipsec.h>
    133 #include <netkey/key.h>
    134 #endif
    135 
    136 #include <machine/stdarg.h>
    137 
    138 /*
    139  * ICMP routines: error generation, receive packet processing, and
    140  * routines to turnaround packets back to the originator, and
    141  * host table maintenance routines.
    142  */
    143 
    144 int	icmpmaskrepl = 0;
    145 #ifdef ICMPPRINTFS
    146 int	icmpprintfs = 0;
    147 #endif
    148 int	icmpreturndatabytes = 8;
    149 
    150 /*
    151  * List of callbacks to notify when Path MTU changes are made.
    152  */
    153 struct icmp_mtudisc_callback {
    154 	LIST_ENTRY(icmp_mtudisc_callback) mc_list;
    155 	void (*mc_func) __P((struct in_addr));
    156 };
    157 
    158 LIST_HEAD(, icmp_mtudisc_callback) icmp_mtudisc_callbacks =
    159     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
    160 
    161 #if 0
    162 static int	ip_next_mtu __P((int, int));
    163 #else
    164 /*static*/ int	ip_next_mtu __P((int, int));
    165 #endif
    166 
    167 extern int icmperrppslim;
    168 static int icmperrpps_count = 0;
    169 static struct timeval icmperrppslim_last;
    170 
    171 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    172 
    173 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    174 
    175 /*
    176  * Register a Path MTU Discovery callback.
    177  */
    178 void
    179 icmp_mtudisc_callback_register(func)
    180 	void (*func) __P((struct in_addr));
    181 {
    182 	struct icmp_mtudisc_callback *mc;
    183 
    184 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    185 	     mc = LIST_NEXT(mc, mc_list)) {
    186 		if (mc->mc_func == func)
    187 			return;
    188 	}
    189 
    190 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
    191 	if (mc == NULL)
    192 		panic("icmp_mtudisc_callback_register");
    193 
    194 	mc->mc_func = func;
    195 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
    196 }
    197 
    198 /*
    199  * Generate an error packet of type error
    200  * in response to bad packet ip.
    201  */
    202 void
    203 icmp_error(n, type, code, dest, destifp)
    204 	struct mbuf *n;
    205 	int type, code;
    206 	n_long dest;
    207 	struct ifnet *destifp;
    208 {
    209 	struct ip *oip = mtod(n, struct ip *), *nip;
    210 	unsigned oiplen = oip->ip_hl << 2;
    211 	struct icmp *icp;
    212 	struct mbuf *m;
    213 	unsigned icmplen, mblen;
    214 
    215 #ifdef ICMPPRINTFS
    216 	if (icmpprintfs)
    217 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    218 #endif
    219 	if (type != ICMP_REDIRECT)
    220 		icmpstat.icps_error++;
    221 	/*
    222 	 * Don't send error if the original packet was encrypted.
    223 	 * Don't send error if not the first fragment of message.
    224 	 * Don't error if the old packet protocol was ICMP
    225 	 * error message, only known informational types.
    226 	 */
    227 	if (n->m_flags & M_DECRYPTED)
    228 		goto freeit;
    229 	if (oip->ip_off &~ (IP_MF|IP_DF))
    230 		goto freeit;
    231 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    232 	  n->m_len >= oiplen + ICMP_MINLEN &&
    233 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    234 		icmpstat.icps_oldicmp++;
    235 		goto freeit;
    236 	}
    237 	/* Don't send error in response to a multicast or broadcast packet */
    238 	if (n->m_flags & (M_BCAST|M_MCAST))
    239 		goto freeit;
    240 
    241 	/*
    242 	 * First, do a rate limitation check.
    243 	 */
    244 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    245 		/* XXX stat */
    246 		goto freeit;
    247 	}
    248 
    249 	/*
    250 	 * Now, formulate icmp message
    251 	 */
    252 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    253 	/*
    254 	 * Defend against mbuf chains shorter than oip->ip_len:
    255 	 */
    256 	mblen = 0;
    257 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    258 		mblen += m->m_len;
    259 	icmplen = min(mblen, icmplen);
    260 
    261 	/*
    262 	 * As we are not required to return everything we have,
    263 	 * we return whatever we can return at ease.
    264 	 *
    265 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    266 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    267 	 * icmp_sysctl will keep things below that limit.
    268 	 */
    269 
    270 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    271 
    272 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    273 		icmplen = MCLBYTES - ICMP_MINLEN;
    274 
    275 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    276 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    277 		MCLGET(m, M_DONTWAIT);
    278 		if ((m->m_flags & M_EXT) == 0) {
    279 			m_freem(m);
    280 			m = NULL;
    281 		}
    282 	}
    283 	if (m == NULL)
    284 		goto freeit;
    285 	m->m_len = icmplen + ICMP_MINLEN;
    286 	if ((m->m_flags & M_EXT) == 0)
    287 		MH_ALIGN(m, m->m_len);
    288 	icp = mtod(m, struct icmp *);
    289 	if ((u_int)type > ICMP_MAXTYPE)
    290 		panic("icmp_error");
    291 	icmpstat.icps_outhist[type]++;
    292 	icp->icmp_type = type;
    293 	if (type == ICMP_REDIRECT)
    294 		icp->icmp_gwaddr.s_addr = dest;
    295 	else {
    296 		icp->icmp_void = 0;
    297 		/*
    298 		 * The following assignments assume an overlay with the
    299 		 * zeroed icmp_void field.
    300 		 */
    301 		if (type == ICMP_PARAMPROB) {
    302 			icp->icmp_pptr = code;
    303 			code = 0;
    304 		} else if (type == ICMP_UNREACH &&
    305 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    306 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    307 	}
    308 
    309 	HTONS(oip->ip_off);
    310 	HTONS(oip->ip_len);
    311 	icp->icmp_code = code;
    312 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    313 	nip = &icp->icmp_ip;
    314 
    315 	/*
    316 	 * Now, copy old ip header (without options)
    317 	 * in front of icmp message.
    318 	 */
    319 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    320 		panic("icmp len");
    321 	m->m_data -= sizeof(struct ip);
    322 	m->m_len += sizeof(struct ip);
    323 	m->m_pkthdr.len = m->m_len;
    324 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    325 	nip = mtod(m, struct ip *);
    326 	/* ip_v set in ip_output */
    327 	nip->ip_hl = sizeof(struct ip) >> 2;
    328 	nip->ip_tos = 0;
    329 	nip->ip_len = m->m_len;
    330 	/* ip_id set in ip_output */
    331 	nip->ip_off = 0;
    332 	/* ip_ttl set in icmp_reflect */
    333 	nip->ip_p = IPPROTO_ICMP;
    334 	nip->ip_src = oip->ip_src;
    335 	nip->ip_dst = oip->ip_dst;
    336 	icmp_reflect(m);
    337 
    338 freeit:
    339 	m_freem(n);
    340 }
    341 
    342 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    343 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    344 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    345 struct sockaddr_in icmpmask = { 8, 0 };
    346 
    347 /*
    348  * Process a received ICMP message.
    349  */
    350 void
    351 #if __STDC__
    352 icmp_input(struct mbuf *m, ...)
    353 #else
    354 icmp_input(m, va_alist)
    355 	struct mbuf *m;
    356 	va_dcl
    357 #endif
    358 {
    359 	int proto;
    360 	struct icmp *icp;
    361 	struct ip *ip = mtod(m, struct ip *);
    362 	int icmplen;
    363 	int i;
    364 	struct in_ifaddr *ia;
    365 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    366 	int code;
    367 	int hlen;
    368 	va_list ap;
    369 
    370 	va_start(ap, m);
    371 	hlen = va_arg(ap, int);
    372 	proto = va_arg(ap, int);
    373 	va_end(ap);
    374 
    375 	/*
    376 	 * Locate icmp structure in mbuf, and check
    377 	 * that not corrupted and of at least minimum length.
    378 	 */
    379 	icmplen = ip->ip_len - hlen;
    380 #ifdef ICMPPRINTFS
    381 	if (icmpprintfs)
    382 		printf("icmp_input from %x to %x, len %d\n",
    383 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    384 		    icmplen);
    385 #endif
    386 	if (icmplen < ICMP_MINLEN) {
    387 		icmpstat.icps_tooshort++;
    388 		goto freeit;
    389 	}
    390 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    391 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    392 		icmpstat.icps_tooshort++;
    393 		return;
    394 	}
    395 	ip = mtod(m, struct ip *);
    396 	m->m_len -= hlen;
    397 	m->m_data += hlen;
    398 	icp = mtod(m, struct icmp *);
    399 	if (in_cksum(m, icmplen)) {
    400 		icmpstat.icps_checksum++;
    401 		goto freeit;
    402 	}
    403 	m->m_len += hlen;
    404 	m->m_data -= hlen;
    405 
    406 #ifdef ICMPPRINTFS
    407 	/*
    408 	 * Message type specific processing.
    409 	 */
    410 	if (icmpprintfs)
    411 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    412 		    icp->icmp_code);
    413 #endif
    414 	if (icp->icmp_type > ICMP_MAXTYPE)
    415 		goto raw;
    416 	icmpstat.icps_inhist[icp->icmp_type]++;
    417 	code = icp->icmp_code;
    418 	switch (icp->icmp_type) {
    419 
    420 	case ICMP_UNREACH:
    421 		switch (code) {
    422 			case ICMP_UNREACH_NET:
    423 			case ICMP_UNREACH_HOST:
    424 			case ICMP_UNREACH_PROTOCOL:
    425 			case ICMP_UNREACH_PORT:
    426 			case ICMP_UNREACH_SRCFAIL:
    427 				code += PRC_UNREACH_NET;
    428 				break;
    429 
    430 			case ICMP_UNREACH_NEEDFRAG:
    431 				code = PRC_MSGSIZE;
    432 				break;
    433 
    434 			case ICMP_UNREACH_NET_UNKNOWN:
    435 			case ICMP_UNREACH_NET_PROHIB:
    436 			case ICMP_UNREACH_TOSNET:
    437 				code = PRC_UNREACH_NET;
    438 				break;
    439 
    440 			case ICMP_UNREACH_HOST_UNKNOWN:
    441 			case ICMP_UNREACH_ISOLATED:
    442 			case ICMP_UNREACH_HOST_PROHIB:
    443 			case ICMP_UNREACH_TOSHOST:
    444 				code = PRC_UNREACH_HOST;
    445 				break;
    446 
    447 			default:
    448 				goto badcode;
    449 		}
    450 		goto deliver;
    451 
    452 	case ICMP_TIMXCEED:
    453 		if (code > 1)
    454 			goto badcode;
    455 		code += PRC_TIMXCEED_INTRANS;
    456 		goto deliver;
    457 
    458 	case ICMP_PARAMPROB:
    459 		if (code > 1)
    460 			goto badcode;
    461 		code = PRC_PARAMPROB;
    462 		goto deliver;
    463 
    464 	case ICMP_SOURCEQUENCH:
    465 		if (code)
    466 			goto badcode;
    467 		code = PRC_QUENCH;
    468 		goto deliver;
    469 
    470 	deliver:
    471 		/*
    472 		 * Problem with datagram; advise higher level routines.
    473 		 */
    474 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    475 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    476 			icmpstat.icps_badlen++;
    477 			goto freeit;
    478 		}
    479 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    480 			goto badcode;
    481 		NTOHS(icp->icmp_ip.ip_len);
    482 #ifdef ICMPPRINTFS
    483 		if (icmpprintfs)
    484 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    485 #endif
    486 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    487 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    488 		if (ctlfunc)
    489 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
    490 			    &icp->icmp_ip);
    491 		break;
    492 
    493 	badcode:
    494 		icmpstat.icps_badcode++;
    495 		break;
    496 
    497 	case ICMP_ECHO:
    498 		icp->icmp_type = ICMP_ECHOREPLY;
    499 		goto reflect;
    500 
    501 	case ICMP_TSTAMP:
    502 		if (icmplen < ICMP_TSLEN) {
    503 			icmpstat.icps_badlen++;
    504 			break;
    505 		}
    506 		icp->icmp_type = ICMP_TSTAMPREPLY;
    507 		icp->icmp_rtime = iptime();
    508 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    509 		goto reflect;
    510 
    511 	case ICMP_MASKREQ:
    512 		if (icmpmaskrepl == 0)
    513 			break;
    514 		/*
    515 		 * We are not able to respond with all ones broadcast
    516 		 * unless we receive it over a point-to-point interface.
    517 		 */
    518 		if (icmplen < ICMP_MASKLEN) {
    519 			icmpstat.icps_badlen++;
    520 			break;
    521 		}
    522 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    523 		    in_nullhost(ip->ip_dst))
    524 			icmpdst.sin_addr = ip->ip_src;
    525 		else
    526 			icmpdst.sin_addr = ip->ip_dst;
    527 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    528 		    m->m_pkthdr.rcvif));
    529 		if (ia == 0)
    530 			break;
    531 		icp->icmp_type = ICMP_MASKREPLY;
    532 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    533 		if (in_nullhost(ip->ip_src)) {
    534 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    535 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    536 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    537 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    538 		}
    539 reflect:
    540 		icmpstat.icps_reflect++;
    541 		icmpstat.icps_outhist[icp->icmp_type]++;
    542 		icmp_reflect(m);
    543 		return;
    544 
    545 	case ICMP_REDIRECT:
    546 		if (code > 3)
    547 			goto badcode;
    548 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    549 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    550 			icmpstat.icps_badlen++;
    551 			break;
    552 		}
    553 		/*
    554 		 * Short circuit routing redirects to force
    555 		 * immediate change in the kernel's routing
    556 		 * tables.  The message is also handed to anyone
    557 		 * listening on a raw socket (e.g. the routing
    558 		 * daemon for use in updating its tables).
    559 		 */
    560 		icmpgw.sin_addr = ip->ip_src;
    561 		icmpdst.sin_addr = icp->icmp_gwaddr;
    562 #ifdef	ICMPPRINTFS
    563 		if (icmpprintfs)
    564 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    565 			    icp->icmp_gwaddr);
    566 #endif
    567 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    568 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    569 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    570 		    sintosa(&icmpgw), (struct rtentry **)0);
    571 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    572 #ifdef IPSEC
    573 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    574 #endif
    575 		break;
    576 
    577 	/*
    578 	 * No kernel processing for the following;
    579 	 * just fall through to send to raw listener.
    580 	 */
    581 	case ICMP_ECHOREPLY:
    582 	case ICMP_ROUTERADVERT:
    583 	case ICMP_ROUTERSOLICIT:
    584 	case ICMP_TSTAMPREPLY:
    585 	case ICMP_IREQREPLY:
    586 	case ICMP_MASKREPLY:
    587 	default:
    588 		break;
    589 	}
    590 
    591 raw:
    592 	rip_input(m, hlen, proto);
    593 	return;
    594 
    595 freeit:
    596 	m_freem(m);
    597 	return;
    598 }
    599 
    600 /*
    601  * Reflect the ip packet back to the source
    602  */
    603 void
    604 icmp_reflect(m)
    605 	struct mbuf *m;
    606 {
    607 	struct ip *ip = mtod(m, struct ip *);
    608 	struct in_ifaddr *ia;
    609 	struct ifaddr *ifa;
    610 	struct sockaddr_in *sin = 0;
    611 	struct in_addr t;
    612 	struct mbuf *opts = 0;
    613 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    614 
    615 	if (!in_canforward(ip->ip_src) &&
    616 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    617 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    618 		m_freem(m);	/* Bad return address */
    619 		goto done;	/* ip_output() will check for broadcast */
    620 	}
    621 	t = ip->ip_dst;
    622 	ip->ip_dst = ip->ip_src;
    623 	/*
    624 	 * If the incoming packet was addressed directly to us, use
    625 	 * dst as the src for the reply.  Otherwise (broadcast or
    626 	 * anonymous), use an address which corresponds to the
    627 	 * incoming interface, with a preference for the address which
    628 	 * corresponds to the route to the destination of the ICMP.
    629 	 */
    630 
    631 	/* Look for packet addressed to us */
    632 	INADDR_TO_IA(t, ia);
    633 
    634 	/* look for packet sent to broadcast address */
    635 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    636 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    637 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    638 			if (ifa->ifa_addr->sa_family != AF_INET)
    639 				continue;
    640 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    641 				ia = ifatoia(ifa);
    642 				break;
    643 			}
    644 		}
    645 	}
    646 
    647 	if (ia)
    648 		sin = &ia->ia_addr;
    649 
    650 	icmpdst.sin_addr = t;
    651 
    652 	/* if the packet is addressed somewhere else, compute the
    653 	   source address for packets routed back to the source, and
    654 	   use that, if it's an address on the interface which
    655 	   received the packet */
    656 	if (sin == (struct sockaddr_in *)0) {
    657 		struct sockaddr_in sin_dst;
    658 		struct route icmproute;
    659 		int errornum;
    660 
    661 		sin_dst.sin_family = AF_INET;
    662 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    663 		sin_dst.sin_addr = ip->ip_dst;
    664 		bzero(&icmproute, sizeof(icmproute));
    665 		errornum = 0;
    666 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    667 		/* errornum is never used */
    668 		if (icmproute.ro_rt)
    669 			RTFREE(icmproute.ro_rt);
    670 		/* check to make sure sin is a source address on rcvif */
    671 		if (sin) {
    672 			t = sin->sin_addr;
    673 			sin = (struct sockaddr_in *)0;
    674 			INADDR_TO_IA(t, ia);
    675 			while (ia) {
    676 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    677 					sin = &ia->ia_addr;
    678 					break;
    679 				}
    680 				NEXT_IA_WITH_SAME_ADDR(ia);
    681 			}
    682 		}
    683 	}
    684 
    685 	/* if it was not addressed to us, but the route doesn't go out
    686 	   the source interface, pick an address on the source
    687 	   interface.  This can happen when routing is asymmetric, or
    688 	   when the incoming packet was encapsulated */
    689 	if (sin == (struct sockaddr_in *)0) {
    690 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    691 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    692 			if (ifa->ifa_addr->sa_family != AF_INET)
    693 				continue;
    694 			sin = &(ifatoia(ifa)->ia_addr);
    695 			break;
    696 		}
    697 	}
    698 
    699 	/*
    700 	 * The following happens if the packet was not addressed to us,
    701 	 * and was received on an interface with no IP address:
    702 	 * We find the first AF_INET address on the first non-loopback
    703 	 * interface.
    704 	 */
    705 	if (sin == (struct sockaddr_in *)0)
    706 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    707 		    ia = ia->ia_list.tqe_next) {
    708 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    709 				continue;
    710 			sin = &ia->ia_addr;
    711 			break;
    712 		}
    713 
    714 	/*
    715 	 * If we still didn't find an address, punt.  We could have an
    716 	 * interface up (and receiving packets) with no address.
    717 	 */
    718 	if (sin == (struct sockaddr_in *)0) {
    719 		m_freem(m);
    720 		goto done;
    721 	}
    722 
    723 	ip->ip_src = sin->sin_addr;
    724 	ip->ip_ttl = MAXTTL;
    725 
    726 	if (optlen > 0) {
    727 		u_char *cp;
    728 		int opt, cnt;
    729 		u_int len;
    730 
    731 		/*
    732 		 * Retrieve any source routing from the incoming packet;
    733 		 * add on any record-route or timestamp options.
    734 		 */
    735 		cp = (u_char *) (ip + 1);
    736 		if ((opts = ip_srcroute()) == 0 &&
    737 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    738 			opts->m_len = sizeof(struct in_addr);
    739 			*mtod(opts, struct in_addr *) = zeroin_addr;
    740 		}
    741 		if (opts) {
    742 #ifdef ICMPPRINTFS
    743 		    if (icmpprintfs)
    744 			    printf("icmp_reflect optlen %d rt %d => ",
    745 				optlen, opts->m_len);
    746 #endif
    747 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    748 			    opt = cp[IPOPT_OPTVAL];
    749 			    if (opt == IPOPT_EOL)
    750 				    break;
    751 			    if (opt == IPOPT_NOP)
    752 				    len = 1;
    753 			    else {
    754 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    755 					    break;
    756 				    len = cp[IPOPT_OLEN];
    757 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    758 				        len > cnt)
    759 					    break;
    760 			    }
    761 			    /*
    762 			     * Should check for overflow, but it "can't happen"
    763 			     */
    764 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    765 				opt == IPOPT_SECURITY) {
    766 				    bcopy((caddr_t)cp,
    767 					mtod(opts, caddr_t) + opts->m_len, len);
    768 				    opts->m_len += len;
    769 			    }
    770 		    }
    771 		    /* Terminate & pad, if necessary */
    772 		    if ((cnt = opts->m_len % 4) != 0) {
    773 			    for (; cnt < 4; cnt++) {
    774 				    *(mtod(opts, caddr_t) + opts->m_len) =
    775 					IPOPT_EOL;
    776 				    opts->m_len++;
    777 			    }
    778 		    }
    779 #ifdef ICMPPRINTFS
    780 		    if (icmpprintfs)
    781 			    printf("%d\n", opts->m_len);
    782 #endif
    783 		}
    784 		/*
    785 		 * Now strip out original options by copying rest of first
    786 		 * mbuf's data back, and adjust the IP length.
    787 		 */
    788 		ip->ip_len -= optlen;
    789 		ip->ip_hl = sizeof(struct ip) >> 2;
    790 		m->m_len -= optlen;
    791 		if (m->m_flags & M_PKTHDR)
    792 			m->m_pkthdr.len -= optlen;
    793 		optlen += sizeof(struct ip);
    794 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    795 			 (unsigned)(m->m_len - sizeof(struct ip)));
    796 	}
    797 	m->m_flags &= ~(M_BCAST|M_MCAST);
    798 	icmp_send(m, opts);
    799 done:
    800 	if (opts)
    801 		(void)m_free(opts);
    802 }
    803 
    804 /*
    805  * Send an icmp packet back to the ip level,
    806  * after supplying a checksum.
    807  */
    808 void
    809 icmp_send(m, opts)
    810 	struct mbuf *m;
    811 	struct mbuf *opts;
    812 {
    813 	struct ip *ip = mtod(m, struct ip *);
    814 	int hlen;
    815 	struct icmp *icp;
    816 
    817 	hlen = ip->ip_hl << 2;
    818 	m->m_data += hlen;
    819 	m->m_len -= hlen;
    820 	icp = mtod(m, struct icmp *);
    821 	icp->icmp_cksum = 0;
    822 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    823 	m->m_data -= hlen;
    824 	m->m_len += hlen;
    825 #ifdef ICMPPRINTFS
    826 	if (icmpprintfs)
    827 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    828 #endif
    829 #ifdef IPSEC
    830 	/* Don't lookup socket */
    831 	(void)ipsec_setsocket(m, NULL);
    832 #endif
    833 	(void) ip_output(m, opts, NULL, 0, NULL);
    834 }
    835 
    836 n_time
    837 iptime()
    838 {
    839 	struct timeval atv;
    840 	u_long t;
    841 
    842 	microtime(&atv);
    843 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    844 	return (htonl(t));
    845 }
    846 
    847 int
    848 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    849 	int *name;
    850 	u_int namelen;
    851 	void *oldp;
    852 	size_t *oldlenp;
    853 	void *newp;
    854 	size_t newlen;
    855 {
    856 	int arg, error;
    857 
    858 	/* All sysctl names at this level are terminal. */
    859 	if (namelen != 1)
    860 		return (ENOTDIR);
    861 
    862 	switch (name[0])
    863 	{
    864 	case ICMPCTL_MASKREPL:
    865 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    866 		break;
    867 	case ICMPCTL_RETURNDATABYTES:
    868 		arg = icmpreturndatabytes;
    869 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    870 		if (error)
    871 			break;
    872 		if ((arg >= 8) || (arg <= 512))
    873 			icmpreturndatabytes = arg;
    874 		else
    875 			error = EINVAL;
    876 		break;
    877 	case ICMPCTL_ERRPPSLIMIT:
    878 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    879 		break;
    880 	default:
    881 		error = ENOPROTOOPT;
    882 		break;
    883 	}
    884 	return error;
    885 }
    886 
    887 void
    888 icmp_mtudisc(icp, faddr)
    889 	struct icmp *icp;
    890 	struct in_addr faddr;
    891 {
    892 	struct icmp_mtudisc_callback *mc;
    893 	struct sockaddr *dst = sintosa(&icmpsrc);
    894 	struct rtentry *rt;
    895 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    896 	int    error;
    897 
    898 	/* Table of common MTUs: */
    899 
    900 	static const u_int mtu_table[] = { 65535, 65280, 32000, 17914, 9180,
    901 	    8166, 4352, 2002, 1492, 1006, 508, 296, 68, 0};
    902 
    903 	rt = rtalloc1(dst, 1);
    904 	if (rt == 0)
    905 		return;
    906 
    907 	/* If we didn't get a host route, allocate one */
    908 
    909 	if ((rt->rt_flags & RTF_HOST) == 0) {
    910 		struct rtentry *nrt;
    911 
    912 		error = rtrequest((int) RTM_ADD, dst,
    913 		    (struct sockaddr *) rt->rt_gateway,
    914 		    (struct sockaddr *) 0,
    915 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    916 		if (error) {
    917 			rtfree(rt);
    918 			return;
    919 		}
    920 		nrt->rt_rmx = rt->rt_rmx;
    921 		rtfree(rt);
    922 		rt = nrt;
    923 	}
    924 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    925 	if (error) {
    926 		rtfree(rt);
    927 		return;
    928 	}
    929 
    930 	if (mtu == 0) {
    931 		int i = 0;
    932 
    933 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    934 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    935 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    936 			mtu -= (icp->icmp_ip.ip_hl << 2);
    937 
    938 		/* If we still can't guess a value, try the route */
    939 
    940 		if (mtu == 0) {
    941 			mtu = rt->rt_rmx.rmx_mtu;
    942 
    943 			/* If no route mtu, default to the interface mtu */
    944 
    945 			if (mtu == 0)
    946 				mtu = rt->rt_ifp->if_mtu;
    947 		}
    948 
    949 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    950 			if (mtu > mtu_table[i]) {
    951 				mtu = mtu_table[i];
    952 				break;
    953 			}
    954 	}
    955 
    956 	/*
    957 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    958 	 *	  to turn off PMTU for a route, and the kernel can
    959 	 *	  set it to indicate a serious problem with PMTU
    960 	 *	  on a route.  We should be using a separate flag
    961 	 *	  for the kernel to indicate this.
    962 	 */
    963 
    964 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    965 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    966 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    967 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    968 			 rt->rt_rmx.rmx_mtu == 0) {
    969 			icmpstat.icps_pmtuchg++;
    970 			rt->rt_rmx.rmx_mtu = mtu;
    971 		}
    972 	}
    973 
    974 	if (rt)
    975 		rtfree(rt);
    976 
    977 	/*
    978 	 * Notify protocols that the MTU for this destination
    979 	 * has changed.
    980 	 */
    981 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    982 	     mc = LIST_NEXT(mc, mc_list))
    983 		(*mc->mc_func)(faddr);
    984 }
    985 
    986 /*
    987  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    988  * given current value MTU.  If DIR is less than zero, a larger plateau
    989  * is returned; otherwise, a smaller value is returned.
    990  */
    991 int
    992 ip_next_mtu(mtu, dir)	/* XXX */
    993 	int mtu;
    994 	int dir;
    995 {
    996 	static const u_int mtutab[] = {
    997 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    998 		68, 0
    999 	};
   1000 	int i;
   1001 
   1002 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
   1003 		if (mtu >= mtutab[i])
   1004 			break;
   1005 	}
   1006 
   1007 	if (dir < 0) {
   1008 		if (i == 0) {
   1009 			return 0;
   1010 		} else {
   1011 			return mtutab[i - 1];
   1012 		}
   1013 	} else {
   1014 		if (mtutab[i] == 0) {
   1015 			return 0;
   1016 		} else if(mtu > mtutab[i]) {
   1017 			return mtutab[i];
   1018 		} else {
   1019 			return mtutab[i + 1];
   1020 		}
   1021 	}
   1022 }
   1023 
   1024 static void
   1025 icmp_mtudisc_timeout(rt, r)
   1026 	struct rtentry *rt;
   1027 	struct rttimer *r;
   1028 {
   1029 	if (rt == NULL)
   1030 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1031 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1032 	    (RTF_DYNAMIC | RTF_HOST)) {
   1033 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1034 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1035 	} else {
   1036 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1037 			rt->rt_rmx.rmx_mtu = 0;
   1038 		}
   1039 	}
   1040 }
   1041 
   1042 /*
   1043  * Perform rate limit check.
   1044  * Returns 0 if it is okay to send the icmp packet.
   1045  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1046  * limitation.
   1047  *
   1048  * XXX per-destination/type check necessary?
   1049  */
   1050 static int
   1051 icmp_ratelimit(dst, type, code)
   1052 	const struct in_addr *dst;
   1053 	const int type;			/* not used at this moment */
   1054 	const int code;			/* not used at this moment */
   1055 {
   1056 
   1057 	/* PPS limit */
   1058 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1059 	    icmperrppslim)) {
   1060 		/* The packet is subject to rate limit */
   1061 		return 1;
   1062 	}
   1063 
   1064 	/*okay to send*/
   1065 	return 0;
   1066 }
   1067