Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.47.2.5
      1 /*	$NetBSD: ip_icmp.c,v 1.47.2.5 2001/04/06 00:24:30 he Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    102  */
    103 
    104 #include "opt_ipsec.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/mbuf.h>
    110 #include <sys/protosw.h>
    111 #include <sys/socket.h>
    112 #include <sys/time.h>
    113 #include <sys/kernel.h>
    114 #include <sys/proc.h>
    115 
    116 #include <vm/vm.h>
    117 #include <sys/sysctl.h>
    118 
    119 #include <net/if.h>
    120 #include <net/route.h>
    121 
    122 #include <netinet/in.h>
    123 #include <netinet/in_systm.h>
    124 #include <netinet/in_var.h>
    125 #include <netinet/ip.h>
    126 #include <netinet/ip_icmp.h>
    127 #include <netinet/ip_var.h>
    128 #include <netinet/in_pcb.h>
    129 #include <netinet/icmp_var.h>
    130 
    131 #ifdef IPSEC
    132 #include <netinet6/ipsec.h>
    133 #include <netkey/key.h>
    134 #include <netkey/key_debug.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 int	icmpreturndatabytes = 8;
    150 
    151 #if 0
    152 static int	ip_next_mtu __P((int, int));
    153 #else
    154 /*static*/ int	ip_next_mtu __P((int, int));
    155 #endif
    156 
    157 extern int icmperrppslim;
    158 static int icmperrpps_count = 0;
    159 static struct timeval icmperrppslim_last;
    160 
    161 static void icmp_mtudisc __P((struct icmp *));
    162 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    163 
    164 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    165 
    166 /*
    167  * Generate an error packet of type error
    168  * in response to bad packet ip.
    169  */
    170 void
    171 icmp_error(n, type, code, dest, destifp)
    172 	struct mbuf *n;
    173 	int type, code;
    174 	n_long dest;
    175 	struct ifnet *destifp;
    176 {
    177 	struct ip *oip = mtod(n, struct ip *), *nip;
    178 	unsigned oiplen = oip->ip_hl << 2;
    179 	struct icmp *icp;
    180 	struct mbuf *m;
    181 	unsigned icmplen, mblen;
    182 
    183 #ifdef ICMPPRINTFS
    184 	if (icmpprintfs)
    185 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    186 #endif
    187 	if (type != ICMP_REDIRECT)
    188 		icmpstat.icps_error++;
    189 	/*
    190 	 * Don't send error if the original packet was encrypted.
    191 	 * Don't send error if not the first fragment of message.
    192 	 * Don't error if the old packet protocol was ICMP
    193 	 * error message, only known informational types.
    194 	 */
    195 	if (n->m_flags & M_DECRYPTED)
    196 		goto freeit;
    197 	if (oip->ip_off &~ (IP_MF|IP_DF))
    198 		goto freeit;
    199 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    200 	  n->m_len >= oiplen + ICMP_MINLEN &&
    201 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    202 		icmpstat.icps_oldicmp++;
    203 		goto freeit;
    204 	}
    205 	/* Don't send error in response to a multicast or broadcast packet */
    206 	if (n->m_flags & (M_BCAST|M_MCAST))
    207 		goto freeit;
    208 
    209 	/*
    210 	 * First, do a rate limitation check.
    211 	 */
    212 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    213 		/* XXX stat */
    214 		goto freeit;
    215 	}
    216 
    217 	/*
    218 	 * Now, formulate icmp message
    219 	 */
    220 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    221 	/*
    222 	 * Defend against mbuf chains shorter than oip->ip_len:
    223 	 */
    224 	mblen = 0;
    225 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    226 		mblen += m->m_len;
    227 	icmplen = min(mblen, icmplen);
    228 
    229 	/*
    230 	 * As we are not required to return everything we have,
    231 	 * we return whatever we can return at ease.
    232 	 *
    233 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    234 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    235 	 * icmp_sysctl will keep things below that limit.
    236 	 */
    237 
    238 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    239 
    240 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    241 		icmplen = MCLBYTES - ICMP_MINLEN;
    242 
    243 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    244 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    245 		MCLGET(m, M_DONTWAIT);
    246 		if ((m->m_flags & M_EXT) == 0) {
    247 			m_freem(m);
    248 			m = NULL;
    249 		}
    250 	}
    251 	if (m == NULL)
    252 		goto freeit;
    253 	m->m_len = icmplen + ICMP_MINLEN;
    254 	if ((m->m_flags & M_EXT) == 0)
    255 		MH_ALIGN(m, m->m_len);
    256 	icp = mtod(m, struct icmp *);
    257 	if ((u_int)type > ICMP_MAXTYPE)
    258 		panic("icmp_error");
    259 	icmpstat.icps_outhist[type]++;
    260 	icp->icmp_type = type;
    261 	if (type == ICMP_REDIRECT)
    262 		icp->icmp_gwaddr.s_addr = dest;
    263 	else {
    264 		icp->icmp_void = 0;
    265 		/*
    266 		 * The following assignments assume an overlay with the
    267 		 * zeroed icmp_void field.
    268 		 */
    269 		if (type == ICMP_PARAMPROB) {
    270 			icp->icmp_pptr = code;
    271 			code = 0;
    272 		} else if (type == ICMP_UNREACH &&
    273 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    274 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    275 	}
    276 
    277 	HTONS(oip->ip_off);
    278 	HTONS(oip->ip_len);
    279 	icp->icmp_code = code;
    280 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    281 	nip = &icp->icmp_ip;
    282 
    283 	/*
    284 	 * Now, copy old ip header (without options)
    285 	 * in front of icmp message.
    286 	 */
    287 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    288 		panic("icmp len");
    289 	m->m_data -= sizeof(struct ip);
    290 	m->m_len += sizeof(struct ip);
    291 	m->m_pkthdr.len = m->m_len;
    292 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    293 	nip = mtod(m, struct ip *);
    294 	/* ip_v set in ip_output */
    295 	nip->ip_hl = sizeof(struct ip) >> 2;
    296 	nip->ip_tos = 0;
    297 	nip->ip_len = m->m_len;
    298 	/* ip_id set in ip_output */
    299 	nip->ip_off = 0;
    300 	/* ip_ttl set in icmp_reflect */
    301 	nip->ip_p = IPPROTO_ICMP;
    302 	nip->ip_src = oip->ip_src;
    303 	nip->ip_dst = oip->ip_dst;
    304 	icmp_reflect(m);
    305 
    306 freeit:
    307 	m_freem(n);
    308 }
    309 
    310 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    311 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    312 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    313 struct sockaddr_in icmpmask = { 8, 0 };
    314 
    315 /*
    316  * Process a received ICMP message.
    317  */
    318 void
    319 #if __STDC__
    320 icmp_input(struct mbuf *m, ...)
    321 #else
    322 icmp_input(m, va_alist)
    323 	struct mbuf *m;
    324 	va_dcl
    325 #endif
    326 {
    327 	int proto;
    328 	struct icmp *icp;
    329 	struct ip *ip = mtod(m, struct ip *);
    330 	int icmplen;
    331 	int i;
    332 	struct in_ifaddr *ia;
    333 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    334 	int code;
    335 	int hlen;
    336 	va_list ap;
    337 
    338 	va_start(ap, m);
    339 	hlen = va_arg(ap, int);
    340 	proto = va_arg(ap, int);
    341 	va_end(ap);
    342 
    343 	/*
    344 	 * Locate icmp structure in mbuf, and check
    345 	 * that not corrupted and of at least minimum length.
    346 	 */
    347 	icmplen = ip->ip_len - hlen;
    348 #ifdef ICMPPRINTFS
    349 	if (icmpprintfs)
    350 		printf("icmp_input from %x to %x, len %d\n",
    351 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    352 		    icmplen);
    353 #endif
    354 	if (icmplen < ICMP_MINLEN) {
    355 		icmpstat.icps_tooshort++;
    356 		goto freeit;
    357 	}
    358 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    359 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    360 		icmpstat.icps_tooshort++;
    361 		return;
    362 	}
    363 	ip = mtod(m, struct ip *);
    364 	m->m_len -= hlen;
    365 	m->m_data += hlen;
    366 	icp = mtod(m, struct icmp *);
    367 	if (in_cksum(m, icmplen)) {
    368 		icmpstat.icps_checksum++;
    369 		goto freeit;
    370 	}
    371 	m->m_len += hlen;
    372 	m->m_data -= hlen;
    373 
    374 #ifdef ICMPPRINTFS
    375 	/*
    376 	 * Message type specific processing.
    377 	 */
    378 	if (icmpprintfs)
    379 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    380 		    icp->icmp_code);
    381 #endif
    382 	if (icp->icmp_type > ICMP_MAXTYPE)
    383 		goto raw;
    384 	icmpstat.icps_inhist[icp->icmp_type]++;
    385 	code = icp->icmp_code;
    386 	switch (icp->icmp_type) {
    387 
    388 	case ICMP_UNREACH:
    389 		switch (code) {
    390 			case ICMP_UNREACH_NET:
    391 			case ICMP_UNREACH_HOST:
    392 			case ICMP_UNREACH_PROTOCOL:
    393 			case ICMP_UNREACH_PORT:
    394 			case ICMP_UNREACH_SRCFAIL:
    395 				code += PRC_UNREACH_NET;
    396 				break;
    397 
    398 			case ICMP_UNREACH_NEEDFRAG:
    399 				code = PRC_MSGSIZE;
    400 				break;
    401 
    402 			case ICMP_UNREACH_NET_UNKNOWN:
    403 			case ICMP_UNREACH_NET_PROHIB:
    404 			case ICMP_UNREACH_TOSNET:
    405 				code = PRC_UNREACH_NET;
    406 				break;
    407 
    408 			case ICMP_UNREACH_HOST_UNKNOWN:
    409 			case ICMP_UNREACH_ISOLATED:
    410 			case ICMP_UNREACH_HOST_PROHIB:
    411 			case ICMP_UNREACH_TOSHOST:
    412 				code = PRC_UNREACH_HOST;
    413 				break;
    414 
    415 			default:
    416 				goto badcode;
    417 		}
    418 		goto deliver;
    419 
    420 	case ICMP_TIMXCEED:
    421 		if (code > 1)
    422 			goto badcode;
    423 		code += PRC_TIMXCEED_INTRANS;
    424 		goto deliver;
    425 
    426 	case ICMP_PARAMPROB:
    427 		if (code > 1)
    428 			goto badcode;
    429 		code = PRC_PARAMPROB;
    430 		goto deliver;
    431 
    432 	case ICMP_SOURCEQUENCH:
    433 		if (code)
    434 			goto badcode;
    435 		code = PRC_QUENCH;
    436 		goto deliver;
    437 
    438 	deliver:
    439 		/*
    440 		 * Problem with datagram; advise higher level routines.
    441 		 */
    442 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    443 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    444 			icmpstat.icps_badlen++;
    445 			goto freeit;
    446 		}
    447 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    448 			goto badcode;
    449 		NTOHS(icp->icmp_ip.ip_len);
    450 #ifdef ICMPPRINTFS
    451 		if (icmpprintfs)
    452 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    453 #endif
    454 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    455 		if (code == PRC_MSGSIZE && ip_mtudisc)
    456 			icmp_mtudisc(icp);
    457 		/*
    458 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
    459 		 * notification to TCP layer.
    460 		 */
    461 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    462 		if (ctlfunc)
    463 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
    464 		break;
    465 
    466 	badcode:
    467 		icmpstat.icps_badcode++;
    468 		break;
    469 
    470 	case ICMP_ECHO:
    471 		icp->icmp_type = ICMP_ECHOREPLY;
    472 		goto reflect;
    473 
    474 	case ICMP_TSTAMP:
    475 		if (icmplen < ICMP_TSLEN) {
    476 			icmpstat.icps_badlen++;
    477 			break;
    478 		}
    479 		icp->icmp_type = ICMP_TSTAMPREPLY;
    480 		icp->icmp_rtime = iptime();
    481 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    482 		goto reflect;
    483 
    484 	case ICMP_MASKREQ:
    485 		if (icmpmaskrepl == 0)
    486 			break;
    487 		/*
    488 		 * We are not able to respond with all ones broadcast
    489 		 * unless we receive it over a point-to-point interface.
    490 		 */
    491 		if (icmplen < ICMP_MASKLEN) {
    492 			icmpstat.icps_badlen++;
    493 			break;
    494 		}
    495 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    496 		    in_nullhost(ip->ip_dst))
    497 			icmpdst.sin_addr = ip->ip_src;
    498 		else
    499 			icmpdst.sin_addr = ip->ip_dst;
    500 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    501 		    m->m_pkthdr.rcvif));
    502 		if (ia == 0)
    503 			break;
    504 		icp->icmp_type = ICMP_MASKREPLY;
    505 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    506 		if (in_nullhost(ip->ip_src)) {
    507 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    508 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    509 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    510 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    511 		}
    512 reflect:
    513 		icmpstat.icps_reflect++;
    514 		icmpstat.icps_outhist[icp->icmp_type]++;
    515 		icmp_reflect(m);
    516 		return;
    517 
    518 	case ICMP_REDIRECT:
    519 		if (code > 3)
    520 			goto badcode;
    521 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    522 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    523 			icmpstat.icps_badlen++;
    524 			break;
    525 		}
    526 		/*
    527 		 * Short circuit routing redirects to force
    528 		 * immediate change in the kernel's routing
    529 		 * tables.  The message is also handed to anyone
    530 		 * listening on a raw socket (e.g. the routing
    531 		 * daemon for use in updating its tables).
    532 		 */
    533 		icmpgw.sin_addr = ip->ip_src;
    534 		icmpdst.sin_addr = icp->icmp_gwaddr;
    535 #ifdef	ICMPPRINTFS
    536 		if (icmpprintfs)
    537 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    538 			    icp->icmp_gwaddr);
    539 #endif
    540 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    541 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    542 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    543 		    sintosa(&icmpgw), (struct rtentry **)0);
    544 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    545 #ifdef IPSEC
    546 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    547 #endif
    548 		break;
    549 
    550 	/*
    551 	 * No kernel processing for the following;
    552 	 * just fall through to send to raw listener.
    553 	 */
    554 	case ICMP_ECHOREPLY:
    555 	case ICMP_ROUTERADVERT:
    556 	case ICMP_ROUTERSOLICIT:
    557 	case ICMP_TSTAMPREPLY:
    558 	case ICMP_IREQREPLY:
    559 	case ICMP_MASKREPLY:
    560 	default:
    561 		break;
    562 	}
    563 
    564 raw:
    565 	rip_input(m, hlen, proto);
    566 	return;
    567 
    568 freeit:
    569 	m_freem(m);
    570 	return;
    571 }
    572 
    573 /*
    574  * Reflect the ip packet back to the source
    575  */
    576 void
    577 icmp_reflect(m)
    578 	struct mbuf *m;
    579 {
    580 	struct ip *ip = mtod(m, struct ip *);
    581 	struct in_ifaddr *ia;
    582 	struct ifaddr *ifa;
    583 	struct sockaddr_in *sin = 0;
    584 	struct in_addr t;
    585 	struct mbuf *opts = 0;
    586 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    587 
    588 	if (!in_canforward(ip->ip_src) &&
    589 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    590 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    591 		m_freem(m);	/* Bad return address */
    592 		goto done;	/* ip_output() will check for broadcast */
    593 	}
    594 	t = ip->ip_dst;
    595 	ip->ip_dst = ip->ip_src;
    596 	/*
    597 	 * If the incoming packet was addressed directly to us, use
    598 	 * dst as the src for the reply.  Otherwise (broadcast or
    599 	 * anonymous), use an address which corresponds to the
    600 	 * incoming interface, with a preference for the address which
    601 	 * corresponds to the route to the destination of the ICMP.
    602 	 */
    603 
    604 	/* Look for packet addressed to us */
    605 	INADDR_TO_IA(t, ia);
    606 
    607 	/* look for packet sent to broadcast address */
    608 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    609 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    610 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    611 			if (ifa->ifa_addr->sa_family != AF_INET)
    612 				continue;
    613 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    614 				ia = ifatoia(ifa);
    615 				break;
    616 			}
    617 		}
    618 	}
    619 
    620 	if (ia)
    621 		sin = &ia->ia_addr;
    622 
    623 	icmpdst.sin_addr = t;
    624 
    625 	/* if the packet is addressed somewhere else, compute the
    626 	   source address for packets routed back to the source, and
    627 	   use that, if it's an address on the interface which
    628 	   received the packet */
    629 	if (sin == (struct sockaddr_in *)0) {
    630 		struct sockaddr_in sin_dst;
    631 		struct route icmproute;
    632 		int errornum;
    633 
    634 		sin_dst.sin_family = AF_INET;
    635 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    636 		sin_dst.sin_addr = ip->ip_dst;
    637 		bzero(&icmproute, sizeof(icmproute));
    638 		errornum = 0;
    639 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    640 		/* errornum is never used */
    641 		if (icmproute.ro_rt)
    642 			RTFREE(icmproute.ro_rt);
    643 		/* check to make sure sin is a source address on rcvif */
    644 		if (sin) {
    645 			t = sin->sin_addr;
    646 			sin = (struct sockaddr_in *)0;
    647 			INADDR_TO_IA(t, ia);
    648 			while (ia) {
    649 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    650 					sin = &ia->ia_addr;
    651 					break;
    652 				}
    653 				NEXT_IA_WITH_SAME_ADDR(ia);
    654 			}
    655 		}
    656 	}
    657 
    658 	/* if it was not addressed to us, but the route doesn't go out
    659 	   the source interface, pick an address on the source
    660 	   interface.  This can happen when routing is asymmetric, or
    661 	   when the incoming packet was encapsulated */
    662 	if (sin == (struct sockaddr_in *)0) {
    663 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    664 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    665 			if (ifa->ifa_addr->sa_family != AF_INET)
    666 				continue;
    667 			sin = &(ifatoia(ifa)->ia_addr);
    668 			break;
    669 		}
    670 	}
    671 
    672 	/*
    673 	 * The following happens if the packet was not addressed to us,
    674 	 * and was received on an interface with no IP address:
    675 	 * We find the first AF_INET address on the first non-loopback
    676 	 * interface.
    677 	 */
    678 	if (sin == (struct sockaddr_in *)0)
    679 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    680 		    ia = ia->ia_list.tqe_next) {
    681 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    682 				continue;
    683 			sin = &ia->ia_addr;
    684 			break;
    685 		}
    686 
    687 	/*
    688 	 * If we still didn't find an address, punt.  We could have an
    689 	 * interface up (and receiving packets) with no address.
    690 	 */
    691 	if (sin == (struct sockaddr_in *)0) {
    692 		m_freem(m);
    693 		goto done;
    694 	}
    695 
    696 	ip->ip_src = sin->sin_addr;
    697 	ip->ip_ttl = MAXTTL;
    698 
    699 	if (optlen > 0) {
    700 		u_char *cp;
    701 		int opt, cnt;
    702 		u_int len;
    703 
    704 		/*
    705 		 * Retrieve any source routing from the incoming packet;
    706 		 * add on any record-route or timestamp options.
    707 		 */
    708 		cp = (u_char *) (ip + 1);
    709 		if ((opts = ip_srcroute()) == 0 &&
    710 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    711 			opts->m_len = sizeof(struct in_addr);
    712 			*mtod(opts, struct in_addr *) = zeroin_addr;
    713 		}
    714 		if (opts) {
    715 #ifdef ICMPPRINTFS
    716 		    if (icmpprintfs)
    717 			    printf("icmp_reflect optlen %d rt %d => ",
    718 				optlen, opts->m_len);
    719 #endif
    720 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    721 			    opt = cp[IPOPT_OPTVAL];
    722 			    if (opt == IPOPT_EOL)
    723 				    break;
    724 			    if (opt == IPOPT_NOP)
    725 				    len = 1;
    726 			    else {
    727 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    728 					    break;
    729 				    len = cp[IPOPT_OLEN];
    730 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    731 				        len > cnt)
    732 					    break;
    733 			    }
    734 			    /*
    735 			     * Should check for overflow, but it "can't happen"
    736 			     */
    737 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    738 				opt == IPOPT_SECURITY) {
    739 				    bcopy((caddr_t)cp,
    740 					mtod(opts, caddr_t) + opts->m_len, len);
    741 				    opts->m_len += len;
    742 			    }
    743 		    }
    744 		    /* Terminate & pad, if necessary */
    745 		    if ((cnt = opts->m_len % 4) != 0) {
    746 			    for (; cnt < 4; cnt++) {
    747 				    *(mtod(opts, caddr_t) + opts->m_len) =
    748 					IPOPT_EOL;
    749 				    opts->m_len++;
    750 			    }
    751 		    }
    752 #ifdef ICMPPRINTFS
    753 		    if (icmpprintfs)
    754 			    printf("%d\n", opts->m_len);
    755 #endif
    756 		}
    757 		/*
    758 		 * Now strip out original options by copying rest of first
    759 		 * mbuf's data back, and adjust the IP length.
    760 		 */
    761 		ip->ip_len -= optlen;
    762 		ip->ip_hl = sizeof(struct ip) >> 2;
    763 		m->m_len -= optlen;
    764 		if (m->m_flags & M_PKTHDR)
    765 			m->m_pkthdr.len -= optlen;
    766 		optlen += sizeof(struct ip);
    767 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    768 			 (unsigned)(m->m_len - sizeof(struct ip)));
    769 	}
    770 	m->m_flags &= ~(M_BCAST|M_MCAST);
    771 	icmp_send(m, opts);
    772 done:
    773 	if (opts)
    774 		(void)m_free(opts);
    775 }
    776 
    777 /*
    778  * Send an icmp packet back to the ip level,
    779  * after supplying a checksum.
    780  */
    781 void
    782 icmp_send(m, opts)
    783 	struct mbuf *m;
    784 	struct mbuf *opts;
    785 {
    786 	struct ip *ip = mtod(m, struct ip *);
    787 	int hlen;
    788 	struct icmp *icp;
    789 
    790 	hlen = ip->ip_hl << 2;
    791 	m->m_data += hlen;
    792 	m->m_len -= hlen;
    793 	icp = mtod(m, struct icmp *);
    794 	icp->icmp_cksum = 0;
    795 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    796 	m->m_data -= hlen;
    797 	m->m_len += hlen;
    798 #ifdef ICMPPRINTFS
    799 	if (icmpprintfs)
    800 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    801 #endif
    802 #ifdef IPSEC
    803 	/* Don't lookup socket */
    804 	(void)ipsec_setsocket(m, NULL);
    805 #endif
    806 	(void) ip_output(m, opts, NULL, 0, NULL);
    807 }
    808 
    809 n_time
    810 iptime()
    811 {
    812 	struct timeval atv;
    813 	u_long t;
    814 
    815 	microtime(&atv);
    816 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    817 	return (htonl(t));
    818 }
    819 
    820 int
    821 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    822 	int *name;
    823 	u_int namelen;
    824 	void *oldp;
    825 	size_t *oldlenp;
    826 	void *newp;
    827 	size_t newlen;
    828 {
    829 	int arg, error;
    830 
    831 	/* All sysctl names at this level are terminal. */
    832 	if (namelen != 1)
    833 		return (ENOTDIR);
    834 
    835 	switch (name[0])
    836 	{
    837 	case ICMPCTL_MASKREPL:
    838 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    839 		break;
    840 	case ICMPCTL_RETURNDATABYTES:
    841 		arg = icmpreturndatabytes;
    842 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    843 		if (error)
    844 			break;
    845 		if ((arg >= 8) || (arg <= 512))
    846 			icmpreturndatabytes = arg;
    847 		else
    848 			error = EINVAL;
    849 		break;
    850 	case ICMPCTL_ERRPPSLIMIT:
    851 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    852 		break;
    853 	default:
    854 		error = ENOPROTOOPT;
    855 		break;
    856 	}
    857 	return error;
    858 }
    859 
    860 static void
    861 icmp_mtudisc(icp)
    862 	struct icmp *icp;
    863 {
    864 	struct rtentry *rt;
    865 	struct sockaddr *dst = sintosa(&icmpsrc);
    866 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    867 	int    error;
    868 
    869 	/* Table of common MTUs: */
    870 
    871 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    872 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    873 
    874 	rt = rtalloc1(dst, 1);
    875 	if (rt == 0)
    876 		return;
    877 
    878 	/* If we didn't get a host route, allocate one */
    879 
    880 	if ((rt->rt_flags & RTF_HOST) == 0) {
    881 		struct rtentry *nrt;
    882 
    883 		error = rtrequest((int) RTM_ADD, dst,
    884 		    (struct sockaddr *) rt->rt_gateway,
    885 		    (struct sockaddr *) 0,
    886 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    887 		if (error) {
    888 			rtfree(rt);
    889 			rtfree(nrt);
    890 			return;
    891 		}
    892 		nrt->rt_rmx = rt->rt_rmx;
    893 		rtfree(rt);
    894 		rt = nrt;
    895 	}
    896 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    897 	if (error) {
    898 		rtfree(rt);
    899 		return;
    900 	}
    901 
    902 	if (mtu == 0) {
    903 		int i = 0;
    904 
    905 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    906 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    907 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    908 			mtu -= (icp->icmp_ip.ip_hl << 2);
    909 
    910 		/* If we still can't guess a value, try the route */
    911 
    912 		if (mtu == 0) {
    913 			mtu = rt->rt_rmx.rmx_mtu;
    914 
    915 			/* If no route mtu, default to the interface mtu */
    916 
    917 			if (mtu == 0)
    918 				mtu = rt->rt_ifp->if_mtu;
    919 		}
    920 
    921 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    922 			if (mtu > mtu_table[i]) {
    923 				mtu = mtu_table[i];
    924 				break;
    925 			}
    926 	}
    927 
    928 	/*
    929 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    930 	 *	  to turn off PMTU for a route, and the kernel can
    931 	 *	  set it to indicate a serious problem with PMTU
    932 	 *	  on a route.  We should be using a separate flag
    933 	 *	  for the kernel to indicate this.
    934 	 */
    935 
    936 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    937 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    938 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    939 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    940 			 rt->rt_rmx.rmx_mtu == 0)
    941 			rt->rt_rmx.rmx_mtu = mtu;
    942 	}
    943 
    944 	if (rt)
    945 		rtfree(rt);
    946 }
    947 
    948 /*
    949  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    950  * given current value MTU.  If DIR is less than zero, a larger plateau
    951  * is returned; otherwise, a smaller value is returned.
    952  */
    953 int
    954 ip_next_mtu(mtu, dir)	/* XXX */
    955 	int mtu;
    956 	int dir;
    957 {
    958 	static int mtutab[] = {
    959 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    960 		68, 0
    961 	};
    962 	int i;
    963 
    964 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
    965 		if (mtu >= mtutab[i])
    966 			break;
    967 	}
    968 
    969 	if (dir < 0) {
    970 		if (i == 0) {
    971 			return 0;
    972 		} else {
    973 			return mtutab[i - 1];
    974 		}
    975 	} else {
    976 		if (mtutab[i] == 0) {
    977 			return 0;
    978 		} else if(mtu > mtutab[i]) {
    979 			return mtutab[i];
    980 		} else {
    981 			return mtutab[i + 1];
    982 		}
    983 	}
    984 }
    985 
    986 static void
    987 icmp_mtudisc_timeout(rt, r)
    988 	struct rtentry *rt;
    989 	struct rttimer *r;
    990 {
    991 	if (rt == NULL)
    992 		panic("icmp_mtudisc_timeout:  bad route to timeout");
    993 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
    994 	    (RTF_DYNAMIC | RTF_HOST)) {
    995 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
    996 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
    997 	} else {
    998 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    999 			rt->rt_rmx.rmx_mtu = 0;
   1000 		}
   1001 	}
   1002 }
   1003 
   1004 /*
   1005  * Perform rate limit check.
   1006  * Returns 0 if it is okay to send the icmp packet.
   1007  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1008  * limitation.
   1009  *
   1010  * XXX per-destination/type check necessary?
   1011  */
   1012 static int
   1013 icmp_ratelimit(dst, type, code)
   1014 	const struct in_addr *dst;
   1015 	const int type;			/* not used at this moment */
   1016 	const int code;			/* not used at this moment */
   1017 {
   1018 
   1019 	/* PPS limit */
   1020 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1021 	    icmperrppslim)) {
   1022 		/* The packet is subject to rate limit */
   1023 		return 1;
   1024 	}
   1025 
   1026 	/*okay to send*/
   1027 	return 0;
   1028 }
   1029