Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.54
      1 /*	$NetBSD: ip_icmp.c,v 1.54 2000/07/28 04:06:53 itojun Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    102  */
    103 
    104 #include "opt_ipsec.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/mbuf.h>
    110 #include <sys/protosw.h>
    111 #include <sys/socket.h>
    112 #include <sys/time.h>
    113 #include <sys/kernel.h>
    114 #include <sys/proc.h>
    115 
    116 #include <uvm/uvm_extern.h>
    117 
    118 #include <sys/sysctl.h>
    119 
    120 #include <net/if.h>
    121 #include <net/route.h>
    122 
    123 #include <netinet/in.h>
    124 #include <netinet/in_systm.h>
    125 #include <netinet/in_var.h>
    126 #include <netinet/ip.h>
    127 #include <netinet/ip_icmp.h>
    128 #include <netinet/ip_var.h>
    129 #include <netinet/in_pcb.h>
    130 #include <netinet/icmp_var.h>
    131 
    132 #ifdef IPSEC
    133 #include <netinet6/ipsec.h>
    134 #include <netkey/key.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 int	icmpreturndatabytes = 8;
    150 
    151 #if 0
    152 static int	ip_next_mtu __P((int, int));
    153 #else
    154 /*static*/ int	ip_next_mtu __P((int, int));
    155 #endif
    156 
    157 extern int icmperrppslim;
    158 static int icmperrpps_count = 0;
    159 static struct timeval icmperrppslim_last;
    160 
    161 static void icmp_mtudisc __P((struct icmp *));
    162 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    163 
    164 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    165 
    166 /*
    167  * Generate an error packet of type error
    168  * in response to bad packet ip.
    169  */
    170 void
    171 icmp_error(n, type, code, dest, destifp)
    172 	struct mbuf *n;
    173 	int type, code;
    174 	n_long dest;
    175 	struct ifnet *destifp;
    176 {
    177 	struct ip *oip = mtod(n, struct ip *), *nip;
    178 	unsigned oiplen = oip->ip_hl << 2;
    179 	struct icmp *icp;
    180 	struct mbuf *m;
    181 	unsigned icmplen, mblen;
    182 
    183 #ifdef ICMPPRINTFS
    184 	if (icmpprintfs)
    185 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    186 #endif
    187 	if (type != ICMP_REDIRECT)
    188 		icmpstat.icps_error++;
    189 	/*
    190 	 * Don't send error if the original packet was encrypted.
    191 	 * Don't send error if not the first fragment of message.
    192 	 * Don't error if the old packet protocol was ICMP
    193 	 * error message, only known informational types.
    194 	 */
    195 	if (n->m_flags & M_DECRYPTED)
    196 		goto freeit;
    197 	if (oip->ip_off &~ (IP_MF|IP_DF))
    198 		goto freeit;
    199 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    200 	  n->m_len >= oiplen + ICMP_MINLEN &&
    201 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    202 		icmpstat.icps_oldicmp++;
    203 		goto freeit;
    204 	}
    205 	/* Don't send error in response to a multicast or broadcast packet */
    206 	if (n->m_flags & (M_BCAST|M_MCAST))
    207 		goto freeit;
    208 
    209 	/*
    210 	 * First, do a rate limitation check.
    211 	 */
    212 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    213 		/* XXX stat */
    214 		goto freeit;
    215 	}
    216 
    217 	/*
    218 	 * Now, formulate icmp message
    219 	 */
    220 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    221 	/*
    222 	 * Defend against mbuf chains shorter than oip->ip_len:
    223 	 */
    224 	mblen = 0;
    225 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    226 		mblen += m->m_len;
    227 	icmplen = min(mblen, icmplen);
    228 
    229 	/*
    230 	 * As we are not required to return everything we have,
    231 	 * we return whatever we can return at ease.
    232 	 *
    233 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    234 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    235 	 * icmp_sysctl will keep things below that limit.
    236 	 */
    237 
    238 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    239 
    240 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    241 		icmplen = MCLBYTES - ICMP_MINLEN;
    242 
    243 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    244 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    245 		MCLGET(m, M_DONTWAIT);
    246 		if ((m->m_flags & M_EXT) == 0) {
    247 			m_freem(m);
    248 			m = NULL;
    249 		}
    250 	}
    251 	if (m == NULL)
    252 		goto freeit;
    253 	m->m_len = icmplen + ICMP_MINLEN;
    254 	if ((m->m_flags & M_EXT) == 0)
    255 		MH_ALIGN(m, m->m_len);
    256 	icp = mtod(m, struct icmp *);
    257 	if ((u_int)type > ICMP_MAXTYPE)
    258 		panic("icmp_error");
    259 	icmpstat.icps_outhist[type]++;
    260 	icp->icmp_type = type;
    261 	if (type == ICMP_REDIRECT)
    262 		icp->icmp_gwaddr.s_addr = dest;
    263 	else {
    264 		icp->icmp_void = 0;
    265 		/*
    266 		 * The following assignments assume an overlay with the
    267 		 * zeroed icmp_void field.
    268 		 */
    269 		if (type == ICMP_PARAMPROB) {
    270 			icp->icmp_pptr = code;
    271 			code = 0;
    272 		} else if (type == ICMP_UNREACH &&
    273 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    274 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    275 	}
    276 
    277 	HTONS(oip->ip_off);
    278 	HTONS(oip->ip_len);
    279 	icp->icmp_code = code;
    280 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    281 	nip = &icp->icmp_ip;
    282 
    283 	/*
    284 	 * Now, copy old ip header (without options)
    285 	 * in front of icmp message.
    286 	 */
    287 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    288 		panic("icmp len");
    289 	m->m_data -= sizeof(struct ip);
    290 	m->m_len += sizeof(struct ip);
    291 	m->m_pkthdr.len = m->m_len;
    292 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    293 	nip = mtod(m, struct ip *);
    294 	/* ip_v set in ip_output */
    295 	nip->ip_hl = sizeof(struct ip) >> 2;
    296 	nip->ip_tos = 0;
    297 	nip->ip_len = m->m_len;
    298 	/* ip_id set in ip_output */
    299 	nip->ip_off = 0;
    300 	/* ip_ttl set in icmp_reflect */
    301 	nip->ip_p = IPPROTO_ICMP;
    302 	nip->ip_src = oip->ip_src;
    303 	nip->ip_dst = oip->ip_dst;
    304 	icmp_reflect(m);
    305 
    306 freeit:
    307 	m_freem(n);
    308 }
    309 
    310 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    311 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    312 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    313 struct sockaddr_in icmpmask = { 8, 0 };
    314 
    315 /*
    316  * Process a received ICMP message.
    317  */
    318 void
    319 #if __STDC__
    320 icmp_input(struct mbuf *m, ...)
    321 #else
    322 icmp_input(m, va_alist)
    323 	struct mbuf *m;
    324 	va_dcl
    325 #endif
    326 {
    327 	int proto;
    328 	struct icmp *icp;
    329 	struct ip *ip = mtod(m, struct ip *);
    330 	int icmplen;
    331 	int i;
    332 	struct in_ifaddr *ia;
    333 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    334 	int code;
    335 	int hlen;
    336 	va_list ap;
    337 
    338 	va_start(ap, m);
    339 	hlen = va_arg(ap, int);
    340 	proto = va_arg(ap, int);
    341 	va_end(ap);
    342 
    343 	/*
    344 	 * Locate icmp structure in mbuf, and check
    345 	 * that not corrupted and of at least minimum length.
    346 	 */
    347 	icmplen = ip->ip_len - hlen;
    348 #ifdef ICMPPRINTFS
    349 	if (icmpprintfs)
    350 		printf("icmp_input from %x to %x, len %d\n",
    351 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    352 		    icmplen);
    353 #endif
    354 	if (icmplen < ICMP_MINLEN) {
    355 		icmpstat.icps_tooshort++;
    356 		goto freeit;
    357 	}
    358 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    359 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    360 		icmpstat.icps_tooshort++;
    361 		return;
    362 	}
    363 	ip = mtod(m, struct ip *);
    364 	m->m_len -= hlen;
    365 	m->m_data += hlen;
    366 	icp = mtod(m, struct icmp *);
    367 	if (in_cksum(m, icmplen)) {
    368 		icmpstat.icps_checksum++;
    369 		goto freeit;
    370 	}
    371 	m->m_len += hlen;
    372 	m->m_data -= hlen;
    373 
    374 #ifdef ICMPPRINTFS
    375 	/*
    376 	 * Message type specific processing.
    377 	 */
    378 	if (icmpprintfs)
    379 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    380 		    icp->icmp_code);
    381 #endif
    382 #ifdef IPSEC
    383 	/* drop it if it does not match the policy */
    384 	if (ipsec4_in_reject(m, NULL)) {
    385 		ipsecstat.in_polvio++;
    386 		goto freeit;
    387 	}
    388 #endif
    389 	if (icp->icmp_type > ICMP_MAXTYPE)
    390 		goto raw;
    391 	icmpstat.icps_inhist[icp->icmp_type]++;
    392 	code = icp->icmp_code;
    393 	switch (icp->icmp_type) {
    394 
    395 	case ICMP_UNREACH:
    396 		switch (code) {
    397 			case ICMP_UNREACH_NET:
    398 			case ICMP_UNREACH_HOST:
    399 			case ICMP_UNREACH_PROTOCOL:
    400 			case ICMP_UNREACH_PORT:
    401 			case ICMP_UNREACH_SRCFAIL:
    402 				code += PRC_UNREACH_NET;
    403 				break;
    404 
    405 			case ICMP_UNREACH_NEEDFRAG:
    406 				code = PRC_MSGSIZE;
    407 				break;
    408 
    409 			case ICMP_UNREACH_NET_UNKNOWN:
    410 			case ICMP_UNREACH_NET_PROHIB:
    411 			case ICMP_UNREACH_TOSNET:
    412 				code = PRC_UNREACH_NET;
    413 				break;
    414 
    415 			case ICMP_UNREACH_HOST_UNKNOWN:
    416 			case ICMP_UNREACH_ISOLATED:
    417 			case ICMP_UNREACH_HOST_PROHIB:
    418 			case ICMP_UNREACH_TOSHOST:
    419 				code = PRC_UNREACH_HOST;
    420 				break;
    421 
    422 			default:
    423 				goto badcode;
    424 		}
    425 		goto deliver;
    426 
    427 	case ICMP_TIMXCEED:
    428 		if (code > 1)
    429 			goto badcode;
    430 		code += PRC_TIMXCEED_INTRANS;
    431 		goto deliver;
    432 
    433 	case ICMP_PARAMPROB:
    434 		if (code > 1)
    435 			goto badcode;
    436 		code = PRC_PARAMPROB;
    437 		goto deliver;
    438 
    439 	case ICMP_SOURCEQUENCH:
    440 		if (code)
    441 			goto badcode;
    442 		code = PRC_QUENCH;
    443 		goto deliver;
    444 
    445 	deliver:
    446 		/*
    447 		 * Problem with datagram; advise higher level routines.
    448 		 */
    449 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    450 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    451 			icmpstat.icps_badlen++;
    452 			goto freeit;
    453 		}
    454 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    455 			goto badcode;
    456 		NTOHS(icp->icmp_ip.ip_len);
    457 #ifdef ICMPPRINTFS
    458 		if (icmpprintfs)
    459 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    460 #endif
    461 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    462 		if (code == PRC_MSGSIZE && ip_mtudisc)
    463 			icmp_mtudisc(icp);
    464 		/*
    465 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
    466 		 * notification to TCP layer.
    467 		 */
    468 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    469 		if (ctlfunc)
    470 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
    471 		break;
    472 
    473 	badcode:
    474 		icmpstat.icps_badcode++;
    475 		break;
    476 
    477 	case ICMP_ECHO:
    478 		icp->icmp_type = ICMP_ECHOREPLY;
    479 		goto reflect;
    480 
    481 	case ICMP_TSTAMP:
    482 		if (icmplen < ICMP_TSLEN) {
    483 			icmpstat.icps_badlen++;
    484 			break;
    485 		}
    486 		icp->icmp_type = ICMP_TSTAMPREPLY;
    487 		icp->icmp_rtime = iptime();
    488 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    489 		goto reflect;
    490 
    491 	case ICMP_MASKREQ:
    492 		if (icmpmaskrepl == 0)
    493 			break;
    494 		/*
    495 		 * We are not able to respond with all ones broadcast
    496 		 * unless we receive it over a point-to-point interface.
    497 		 */
    498 		if (icmplen < ICMP_MASKLEN) {
    499 			icmpstat.icps_badlen++;
    500 			break;
    501 		}
    502 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    503 		    in_nullhost(ip->ip_dst))
    504 			icmpdst.sin_addr = ip->ip_src;
    505 		else
    506 			icmpdst.sin_addr = ip->ip_dst;
    507 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    508 		    m->m_pkthdr.rcvif));
    509 		if (ia == 0)
    510 			break;
    511 		icp->icmp_type = ICMP_MASKREPLY;
    512 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    513 		if (in_nullhost(ip->ip_src)) {
    514 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    515 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    516 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    517 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    518 		}
    519 reflect:
    520 		icmpstat.icps_reflect++;
    521 		icmpstat.icps_outhist[icp->icmp_type]++;
    522 		icmp_reflect(m);
    523 		return;
    524 
    525 	case ICMP_REDIRECT:
    526 		if (code > 3)
    527 			goto badcode;
    528 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    529 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    530 			icmpstat.icps_badlen++;
    531 			break;
    532 		}
    533 		/*
    534 		 * Short circuit routing redirects to force
    535 		 * immediate change in the kernel's routing
    536 		 * tables.  The message is also handed to anyone
    537 		 * listening on a raw socket (e.g. the routing
    538 		 * daemon for use in updating its tables).
    539 		 */
    540 		icmpgw.sin_addr = ip->ip_src;
    541 		icmpdst.sin_addr = icp->icmp_gwaddr;
    542 #ifdef	ICMPPRINTFS
    543 		if (icmpprintfs)
    544 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    545 			    icp->icmp_gwaddr);
    546 #endif
    547 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    548 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    549 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    550 		    sintosa(&icmpgw), (struct rtentry **)0);
    551 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    552 #ifdef IPSEC
    553 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    554 #endif
    555 		break;
    556 
    557 	/*
    558 	 * No kernel processing for the following;
    559 	 * just fall through to send to raw listener.
    560 	 */
    561 	case ICMP_ECHOREPLY:
    562 	case ICMP_ROUTERADVERT:
    563 	case ICMP_ROUTERSOLICIT:
    564 	case ICMP_TSTAMPREPLY:
    565 	case ICMP_IREQREPLY:
    566 	case ICMP_MASKREPLY:
    567 	default:
    568 		break;
    569 	}
    570 
    571 raw:
    572 	rip_input(m, hlen, proto);
    573 	return;
    574 
    575 freeit:
    576 	m_freem(m);
    577 	return;
    578 }
    579 
    580 /*
    581  * Reflect the ip packet back to the source
    582  */
    583 void
    584 icmp_reflect(m)
    585 	struct mbuf *m;
    586 {
    587 	struct ip *ip = mtod(m, struct ip *);
    588 	struct in_ifaddr *ia;
    589 	struct ifaddr *ifa;
    590 	struct sockaddr_in *sin = 0;
    591 	struct in_addr t;
    592 	struct mbuf *opts = 0;
    593 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    594 
    595 	if (!in_canforward(ip->ip_src) &&
    596 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    597 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    598 		m_freem(m);	/* Bad return address */
    599 		goto done;	/* ip_output() will check for broadcast */
    600 	}
    601 	t = ip->ip_dst;
    602 	ip->ip_dst = ip->ip_src;
    603 	/*
    604 	 * If the incoming packet was addressed directly to us, use
    605 	 * dst as the src for the reply.  Otherwise (broadcast or
    606 	 * anonymous), use an address which corresponds to the
    607 	 * incoming interface, with a preference for the address which
    608 	 * corresponds to the route to the destination of the ICMP.
    609 	 */
    610 
    611 	/* Look for packet addressed to us */
    612 	INADDR_TO_IA(t, ia);
    613 
    614 	/* look for packet sent to broadcast address */
    615 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    616 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    617 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    618 			if (ifa->ifa_addr->sa_family != AF_INET)
    619 				continue;
    620 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    621 				ia = ifatoia(ifa);
    622 				break;
    623 			}
    624 		}
    625 	}
    626 
    627 	if (ia)
    628 		sin = &ia->ia_addr;
    629 
    630 	icmpdst.sin_addr = t;
    631 
    632 	/* if the packet is addressed somewhere else, compute the
    633 	   source address for packets routed back to the source, and
    634 	   use that, if it's an address on the interface which
    635 	   received the packet */
    636 	if (sin == (struct sockaddr_in *)0) {
    637 		struct sockaddr_in sin_dst;
    638 		struct route icmproute;
    639 		int errornum;
    640 
    641 		sin_dst.sin_family = AF_INET;
    642 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    643 		sin_dst.sin_addr = ip->ip_dst;
    644 		bzero(&icmproute, sizeof(icmproute));
    645 		errornum = 0;
    646 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    647 		/* errornum is never used */
    648 		if (icmproute.ro_rt)
    649 			RTFREE(icmproute.ro_rt);
    650 		/* check to make sure sin is a source address on rcvif */
    651 		if (sin) {
    652 			t = sin->sin_addr;
    653 			sin = (struct sockaddr_in *)0;
    654 			INADDR_TO_IA(t, ia);
    655 			while (ia) {
    656 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    657 					sin = &ia->ia_addr;
    658 					break;
    659 				}
    660 				NEXT_IA_WITH_SAME_ADDR(ia);
    661 			}
    662 		}
    663 	}
    664 
    665 	/* if it was not addressed to us, but the route doesn't go out
    666 	   the source interface, pick an address on the source
    667 	   interface.  This can happen when routing is asymmetric, or
    668 	   when the incoming packet was encapsulated */
    669 	if (sin == (struct sockaddr_in *)0) {
    670 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    671 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    672 			if (ifa->ifa_addr->sa_family != AF_INET)
    673 				continue;
    674 			sin = &(ifatoia(ifa)->ia_addr);
    675 			break;
    676 		}
    677 	}
    678 
    679 	/*
    680 	 * The following happens if the packet was not addressed to us,
    681 	 * and was received on an interface with no IP address:
    682 	 * We find the first AF_INET address on the first non-loopback
    683 	 * interface.
    684 	 */
    685 	if (sin == (struct sockaddr_in *)0)
    686 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    687 		    ia = ia->ia_list.tqe_next) {
    688 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    689 				continue;
    690 			sin = &ia->ia_addr;
    691 			break;
    692 		}
    693 
    694 	/*
    695 	 * If we still didn't find an address, punt.  We could have an
    696 	 * interface up (and receiving packets) with no address.
    697 	 */
    698 	if (sin == (struct sockaddr_in *)0) {
    699 		m_freem(m);
    700 		goto done;
    701 	}
    702 
    703 	ip->ip_src = sin->sin_addr;
    704 	ip->ip_ttl = MAXTTL;
    705 
    706 	if (optlen > 0) {
    707 		u_char *cp;
    708 		int opt, cnt;
    709 		u_int len;
    710 
    711 		/*
    712 		 * Retrieve any source routing from the incoming packet;
    713 		 * add on any record-route or timestamp options.
    714 		 */
    715 		cp = (u_char *) (ip + 1);
    716 		if ((opts = ip_srcroute()) == 0 &&
    717 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    718 			opts->m_len = sizeof(struct in_addr);
    719 			*mtod(opts, struct in_addr *) = zeroin_addr;
    720 		}
    721 		if (opts) {
    722 #ifdef ICMPPRINTFS
    723 		    if (icmpprintfs)
    724 			    printf("icmp_reflect optlen %d rt %d => ",
    725 				optlen, opts->m_len);
    726 #endif
    727 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    728 			    opt = cp[IPOPT_OPTVAL];
    729 			    if (opt == IPOPT_EOL)
    730 				    break;
    731 			    if (opt == IPOPT_NOP)
    732 				    len = 1;
    733 			    else {
    734 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    735 					    break;
    736 				    len = cp[IPOPT_OLEN];
    737 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    738 				        len > cnt)
    739 					    break;
    740 			    }
    741 			    /*
    742 			     * Should check for overflow, but it "can't happen"
    743 			     */
    744 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    745 				opt == IPOPT_SECURITY) {
    746 				    bcopy((caddr_t)cp,
    747 					mtod(opts, caddr_t) + opts->m_len, len);
    748 				    opts->m_len += len;
    749 			    }
    750 		    }
    751 		    /* Terminate & pad, if necessary */
    752 		    if ((cnt = opts->m_len % 4) != 0) {
    753 			    for (; cnt < 4; cnt++) {
    754 				    *(mtod(opts, caddr_t) + opts->m_len) =
    755 					IPOPT_EOL;
    756 				    opts->m_len++;
    757 			    }
    758 		    }
    759 #ifdef ICMPPRINTFS
    760 		    if (icmpprintfs)
    761 			    printf("%d\n", opts->m_len);
    762 #endif
    763 		}
    764 		/*
    765 		 * Now strip out original options by copying rest of first
    766 		 * mbuf's data back, and adjust the IP length.
    767 		 */
    768 		ip->ip_len -= optlen;
    769 		ip->ip_hl = sizeof(struct ip) >> 2;
    770 		m->m_len -= optlen;
    771 		if (m->m_flags & M_PKTHDR)
    772 			m->m_pkthdr.len -= optlen;
    773 		optlen += sizeof(struct ip);
    774 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    775 			 (unsigned)(m->m_len - sizeof(struct ip)));
    776 	}
    777 	m->m_flags &= ~(M_BCAST|M_MCAST);
    778 	icmp_send(m, opts);
    779 done:
    780 	if (opts)
    781 		(void)m_free(opts);
    782 }
    783 
    784 /*
    785  * Send an icmp packet back to the ip level,
    786  * after supplying a checksum.
    787  */
    788 void
    789 icmp_send(m, opts)
    790 	struct mbuf *m;
    791 	struct mbuf *opts;
    792 {
    793 	struct ip *ip = mtod(m, struct ip *);
    794 	int hlen;
    795 	struct icmp *icp;
    796 
    797 	hlen = ip->ip_hl << 2;
    798 	m->m_data += hlen;
    799 	m->m_len -= hlen;
    800 	icp = mtod(m, struct icmp *);
    801 	icp->icmp_cksum = 0;
    802 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    803 	m->m_data -= hlen;
    804 	m->m_len += hlen;
    805 #ifdef ICMPPRINTFS
    806 	if (icmpprintfs)
    807 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    808 #endif
    809 #ifdef IPSEC
    810 	/* Don't lookup socket */
    811 	ipsec_setsocket(m, NULL);
    812 #endif
    813 	(void) ip_output(m, opts, NULL, 0, NULL);
    814 }
    815 
    816 n_time
    817 iptime()
    818 {
    819 	struct timeval atv;
    820 	u_long t;
    821 
    822 	microtime(&atv);
    823 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    824 	return (htonl(t));
    825 }
    826 
    827 int
    828 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    829 	int *name;
    830 	u_int namelen;
    831 	void *oldp;
    832 	size_t *oldlenp;
    833 	void *newp;
    834 	size_t newlen;
    835 {
    836 	int arg, error;
    837 
    838 	/* All sysctl names at this level are terminal. */
    839 	if (namelen != 1)
    840 		return (ENOTDIR);
    841 
    842 	switch (name[0])
    843 	{
    844 	case ICMPCTL_MASKREPL:
    845 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    846 		break;
    847 	case ICMPCTL_RETURNDATABYTES:
    848 		arg = icmpreturndatabytes;
    849 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    850 		if (error)
    851 			break;
    852 		if ((arg >= 8) || (arg <= 512))
    853 			icmpreturndatabytes = arg;
    854 		else
    855 			error = EINVAL;
    856 		break;
    857 	case ICMPCTL_ERRPPSLIMIT:
    858 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    859 		break;
    860 	default:
    861 		error = ENOPROTOOPT;
    862 		break;
    863 	}
    864 	return error;
    865 }
    866 
    867 static void
    868 icmp_mtudisc(icp)
    869 	struct icmp *icp;
    870 {
    871 	struct rtentry *rt;
    872 	struct sockaddr *dst = sintosa(&icmpsrc);
    873 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    874 	int    error;
    875 
    876 	/* Table of common MTUs: */
    877 
    878 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    879 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    880 
    881 	rt = rtalloc1(dst, 1);
    882 	if (rt == 0)
    883 		return;
    884 
    885 	/* If we didn't get a host route, allocate one */
    886 
    887 	if ((rt->rt_flags & RTF_HOST) == 0) {
    888 		struct rtentry *nrt;
    889 
    890 		error = rtrequest((int) RTM_ADD, dst,
    891 		    (struct sockaddr *) rt->rt_gateway,
    892 		    (struct sockaddr *) 0,
    893 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    894 		if (error) {
    895 			rtfree(rt);
    896 			rtfree(nrt);
    897 			return;
    898 		}
    899 		nrt->rt_rmx = rt->rt_rmx;
    900 		rtfree(rt);
    901 		rt = nrt;
    902 	}
    903 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    904 	if (error) {
    905 		rtfree(rt);
    906 		return;
    907 	}
    908 
    909 	if (mtu == 0) {
    910 		int i = 0;
    911 
    912 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    913 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    914 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    915 			mtu -= (icp->icmp_ip.ip_hl << 2);
    916 
    917 		/* If we still can't guess a value, try the route */
    918 
    919 		if (mtu == 0) {
    920 			mtu = rt->rt_rmx.rmx_mtu;
    921 
    922 			/* If no route mtu, default to the interface mtu */
    923 
    924 			if (mtu == 0)
    925 				mtu = rt->rt_ifp->if_mtu;
    926 		}
    927 
    928 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    929 			if (mtu > mtu_table[i]) {
    930 				mtu = mtu_table[i];
    931 				break;
    932 			}
    933 	}
    934 
    935 	/*
    936 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    937 	 *	  to turn off PMTU for a route, and the kernel can
    938 	 *	  set it to indicate a serious problem with PMTU
    939 	 *	  on a route.  We should be using a separate flag
    940 	 *	  for the kernel to indicate this.
    941 	 */
    942 
    943 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    944 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    945 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    946 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    947 			 rt->rt_rmx.rmx_mtu == 0)
    948 			rt->rt_rmx.rmx_mtu = mtu;
    949 	}
    950 
    951 	if (rt)
    952 		rtfree(rt);
    953 }
    954 
    955 /*
    956  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    957  * given current value MTU.  If DIR is less than zero, a larger plateau
    958  * is returned; otherwise, a smaller value is returned.
    959  */
    960 int
    961 ip_next_mtu(mtu, dir)	/* XXX */
    962 	int mtu;
    963 	int dir;
    964 {
    965 	static int mtutab[] = {
    966 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    967 		68, 0
    968 	};
    969 	int i;
    970 
    971 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
    972 		if (mtu >= mtutab[i])
    973 			break;
    974 	}
    975 
    976 	if (dir < 0) {
    977 		if (i == 0) {
    978 			return 0;
    979 		} else {
    980 			return mtutab[i - 1];
    981 		}
    982 	} else {
    983 		if (mtutab[i] == 0) {
    984 			return 0;
    985 		} else if(mtu > mtutab[i]) {
    986 			return mtutab[i];
    987 		} else {
    988 			return mtutab[i + 1];
    989 		}
    990 	}
    991 }
    992 
    993 static void
    994 icmp_mtudisc_timeout(rt, r)
    995 	struct rtentry *rt;
    996 	struct rttimer *r;
    997 {
    998 	if (rt == NULL)
    999 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1000 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1001 	    (RTF_DYNAMIC | RTF_HOST)) {
   1002 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1003 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1004 	} else {
   1005 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1006 			rt->rt_rmx.rmx_mtu = 0;
   1007 		}
   1008 	}
   1009 }
   1010 
   1011 /*
   1012  * Perform rate limit check.
   1013  * Returns 0 if it is okay to send the icmp packet.
   1014  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1015  * limitation.
   1016  *
   1017  * XXX per-destination/type check necessary?
   1018  */
   1019 static int
   1020 icmp_ratelimit(dst, type, code)
   1021 	const struct in_addr *dst;
   1022 	const int type;			/* not used at this moment */
   1023 	const int code;			/* not used at this moment */
   1024 {
   1025 
   1026 	/* PPS limit */
   1027 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1028 	    icmperrppslim)) {
   1029 		/* The packet is subject to rate limit */
   1030 		return 1;
   1031 	}
   1032 
   1033 	/*okay to send*/
   1034 	return 0;
   1035 }
   1036