Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.52
      1 /*	$NetBSD: ip_icmp.c,v 1.52 2000/07/24 03:32:31 sommerfeld Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    102  */
    103 
    104 #include "opt_ipsec.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/mbuf.h>
    110 #include <sys/protosw.h>
    111 #include <sys/socket.h>
    112 #include <sys/time.h>
    113 #include <sys/kernel.h>
    114 #include <sys/proc.h>
    115 
    116 #include <uvm/uvm_extern.h>
    117 
    118 #include <sys/sysctl.h>
    119 
    120 #include <net/if.h>
    121 #include <net/route.h>
    122 
    123 #include <netinet/in.h>
    124 #include <netinet/in_systm.h>
    125 #include <netinet/in_var.h>
    126 #include <netinet/ip.h>
    127 #include <netinet/ip_icmp.h>
    128 #include <netinet/ip_var.h>
    129 #include <netinet/in_pcb.h>
    130 #include <netinet/icmp_var.h>
    131 
    132 #ifdef IPSEC
    133 #include <netinet6/ipsec.h>
    134 #include <netkey/key.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 int	icmpreturndatabytes = 8;
    150 
    151 #if 0
    152 static int	ip_next_mtu __P((int, int));
    153 #else
    154 /*static*/ int	ip_next_mtu __P((int, int));
    155 #endif
    156 
    157 extern	struct timeval icmperrratelim;
    158 extern int icmperrppslim;
    159 static int icmperrpps_count = 0;
    160 static struct timeval icmperrppslim_last;
    161 
    162 static void icmp_mtudisc __P((struct icmp *));
    163 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    164 
    165 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    166 
    167 /*
    168  * Generate an error packet of type error
    169  * in response to bad packet ip.
    170  */
    171 void
    172 icmp_error(n, type, code, dest, destifp)
    173 	struct mbuf *n;
    174 	int type, code;
    175 	n_long dest;
    176 	struct ifnet *destifp;
    177 {
    178 	struct ip *oip = mtod(n, struct ip *), *nip;
    179 	unsigned oiplen = oip->ip_hl << 2;
    180 	struct icmp *icp;
    181 	struct mbuf *m;
    182 	unsigned icmplen, mblen;
    183 
    184 #ifdef ICMPPRINTFS
    185 	if (icmpprintfs)
    186 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    187 #endif
    188 	if (type != ICMP_REDIRECT)
    189 		icmpstat.icps_error++;
    190 	/*
    191 	 * Don't send error if the original packet was encrypted.
    192 	 * Don't send error if not the first fragment of message.
    193 	 * Don't error if the old packet protocol was ICMP
    194 	 * error message, only known informational types.
    195 	 */
    196 	if (n->m_flags & M_DECRYPTED)
    197 		goto freeit;
    198 	if (oip->ip_off &~ (IP_MF|IP_DF))
    199 		goto freeit;
    200 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    201 	  n->m_len >= oiplen + ICMP_MINLEN &&
    202 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    203 		icmpstat.icps_oldicmp++;
    204 		goto freeit;
    205 	}
    206 	/* Don't send error in response to a multicast or broadcast packet */
    207 	if (n->m_flags & (M_BCAST|M_MCAST))
    208 		goto freeit;
    209 
    210 	/*
    211 	 * First, do a rate limitation check.
    212 	 */
    213 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    214 		/* XXX stat */
    215 		goto freeit;
    216 	}
    217 
    218 	/*
    219 	 * Now, formulate icmp message
    220 	 */
    221 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    222 	/*
    223 	 * Defend against mbuf chains shorter than oip->ip_len:
    224 	 */
    225 	mblen = 0;
    226 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    227 		mblen += m->m_len;
    228 	icmplen = min(mblen, icmplen);
    229 
    230 	/*
    231 	 * As we are not required to return everything we have,
    232 	 * we return whatever we can return at ease.
    233 	 *
    234 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    235 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    236 	 * icmp_sysctl will keep things below that limit.
    237 	 */
    238 
    239 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    240 
    241 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    242 		icmplen = MCLBYTES - ICMP_MINLEN;
    243 
    244 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    245 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    246 		MCLGET(m, M_DONTWAIT);
    247 		if ((m->m_flags & M_EXT) == 0) {
    248 			m_freem(m);
    249 			m = NULL;
    250 		}
    251 	}
    252 	if (m == NULL)
    253 		goto freeit;
    254 	m->m_len = icmplen + ICMP_MINLEN;
    255 	if ((m->m_flags & M_EXT) == 0)
    256 		MH_ALIGN(m, m->m_len);
    257 	icp = mtod(m, struct icmp *);
    258 	if ((u_int)type > ICMP_MAXTYPE)
    259 		panic("icmp_error");
    260 	icmpstat.icps_outhist[type]++;
    261 	icp->icmp_type = type;
    262 	if (type == ICMP_REDIRECT)
    263 		icp->icmp_gwaddr.s_addr = dest;
    264 	else {
    265 		icp->icmp_void = 0;
    266 		/*
    267 		 * The following assignments assume an overlay with the
    268 		 * zeroed icmp_void field.
    269 		 */
    270 		if (type == ICMP_PARAMPROB) {
    271 			icp->icmp_pptr = code;
    272 			code = 0;
    273 		} else if (type == ICMP_UNREACH &&
    274 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    275 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    276 	}
    277 
    278 	HTONS(oip->ip_off);
    279 	HTONS(oip->ip_len);
    280 	icp->icmp_code = code;
    281 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    282 	nip = &icp->icmp_ip;
    283 
    284 	/*
    285 	 * Now, copy old ip header (without options)
    286 	 * in front of icmp message.
    287 	 */
    288 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    289 		panic("icmp len");
    290 	m->m_data -= sizeof(struct ip);
    291 	m->m_len += sizeof(struct ip);
    292 	m->m_pkthdr.len = m->m_len;
    293 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    294 	nip = mtod(m, struct ip *);
    295 	/* ip_v set in ip_output */
    296 	nip->ip_hl = sizeof(struct ip) >> 2;
    297 	nip->ip_tos = 0;
    298 	nip->ip_len = m->m_len;
    299 	/* ip_id set in ip_output */
    300 	nip->ip_off = 0;
    301 	/* ip_ttl set in icmp_reflect */
    302 	nip->ip_p = IPPROTO_ICMP;
    303 	nip->ip_src = oip->ip_src;
    304 	nip->ip_dst = oip->ip_dst;
    305 	icmp_reflect(m);
    306 
    307 freeit:
    308 	m_freem(n);
    309 }
    310 
    311 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    312 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    313 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    314 struct sockaddr_in icmpmask = { 8, 0 };
    315 
    316 /*
    317  * Process a received ICMP message.
    318  */
    319 void
    320 #if __STDC__
    321 icmp_input(struct mbuf *m, ...)
    322 #else
    323 icmp_input(m, va_alist)
    324 	struct mbuf *m;
    325 	va_dcl
    326 #endif
    327 {
    328 	int proto;
    329 	struct icmp *icp;
    330 	struct ip *ip = mtod(m, struct ip *);
    331 	int icmplen;
    332 	int i;
    333 	struct in_ifaddr *ia;
    334 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    335 	int code;
    336 	int hlen;
    337 	va_list ap;
    338 
    339 	va_start(ap, m);
    340 	hlen = va_arg(ap, int);
    341 	proto = va_arg(ap, int);
    342 	va_end(ap);
    343 
    344 	/*
    345 	 * Locate icmp structure in mbuf, and check
    346 	 * that not corrupted and of at least minimum length.
    347 	 */
    348 	icmplen = ip->ip_len - hlen;
    349 #ifdef ICMPPRINTFS
    350 	if (icmpprintfs)
    351 		printf("icmp_input from %x to %x, len %d\n",
    352 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    353 		    icmplen);
    354 #endif
    355 	if (icmplen < ICMP_MINLEN) {
    356 		icmpstat.icps_tooshort++;
    357 		goto freeit;
    358 	}
    359 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    360 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    361 		icmpstat.icps_tooshort++;
    362 		return;
    363 	}
    364 	ip = mtod(m, struct ip *);
    365 	m->m_len -= hlen;
    366 	m->m_data += hlen;
    367 	icp = mtod(m, struct icmp *);
    368 	if (in_cksum(m, icmplen)) {
    369 		icmpstat.icps_checksum++;
    370 		goto freeit;
    371 	}
    372 	m->m_len += hlen;
    373 	m->m_data -= hlen;
    374 
    375 #ifdef ICMPPRINTFS
    376 	/*
    377 	 * Message type specific processing.
    378 	 */
    379 	if (icmpprintfs)
    380 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    381 		    icp->icmp_code);
    382 #endif
    383 #ifdef IPSEC
    384 	/* drop it if it does not match the policy */
    385 	if (ipsec4_in_reject(m, NULL)) {
    386 		ipsecstat.in_polvio++;
    387 		goto freeit;
    388 	}
    389 #endif
    390 	if (icp->icmp_type > ICMP_MAXTYPE)
    391 		goto raw;
    392 	icmpstat.icps_inhist[icp->icmp_type]++;
    393 	code = icp->icmp_code;
    394 	switch (icp->icmp_type) {
    395 
    396 	case ICMP_UNREACH:
    397 		switch (code) {
    398 			case ICMP_UNREACH_NET:
    399 			case ICMP_UNREACH_HOST:
    400 			case ICMP_UNREACH_PROTOCOL:
    401 			case ICMP_UNREACH_PORT:
    402 			case ICMP_UNREACH_SRCFAIL:
    403 				code += PRC_UNREACH_NET;
    404 				break;
    405 
    406 			case ICMP_UNREACH_NEEDFRAG:
    407 				code = PRC_MSGSIZE;
    408 				break;
    409 
    410 			case ICMP_UNREACH_NET_UNKNOWN:
    411 			case ICMP_UNREACH_NET_PROHIB:
    412 			case ICMP_UNREACH_TOSNET:
    413 				code = PRC_UNREACH_NET;
    414 				break;
    415 
    416 			case ICMP_UNREACH_HOST_UNKNOWN:
    417 			case ICMP_UNREACH_ISOLATED:
    418 			case ICMP_UNREACH_HOST_PROHIB:
    419 			case ICMP_UNREACH_TOSHOST:
    420 				code = PRC_UNREACH_HOST;
    421 				break;
    422 
    423 			default:
    424 				goto badcode;
    425 		}
    426 		goto deliver;
    427 
    428 	case ICMP_TIMXCEED:
    429 		if (code > 1)
    430 			goto badcode;
    431 		code += PRC_TIMXCEED_INTRANS;
    432 		goto deliver;
    433 
    434 	case ICMP_PARAMPROB:
    435 		if (code > 1)
    436 			goto badcode;
    437 		code = PRC_PARAMPROB;
    438 		goto deliver;
    439 
    440 	case ICMP_SOURCEQUENCH:
    441 		if (code)
    442 			goto badcode;
    443 		code = PRC_QUENCH;
    444 		goto deliver;
    445 
    446 	deliver:
    447 		/*
    448 		 * Problem with datagram; advise higher level routines.
    449 		 */
    450 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    451 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    452 			icmpstat.icps_badlen++;
    453 			goto freeit;
    454 		}
    455 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    456 			goto badcode;
    457 		NTOHS(icp->icmp_ip.ip_len);
    458 #ifdef ICMPPRINTFS
    459 		if (icmpprintfs)
    460 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    461 #endif
    462 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    463 		if (code == PRC_MSGSIZE && ip_mtudisc)
    464 			icmp_mtudisc(icp);
    465 		/*
    466 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
    467 		 * notification to TCP layer.
    468 		 */
    469 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    470 		if (ctlfunc)
    471 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
    472 		break;
    473 
    474 	badcode:
    475 		icmpstat.icps_badcode++;
    476 		break;
    477 
    478 	case ICMP_ECHO:
    479 		icp->icmp_type = ICMP_ECHOREPLY;
    480 		goto reflect;
    481 
    482 	case ICMP_TSTAMP:
    483 		if (icmplen < ICMP_TSLEN) {
    484 			icmpstat.icps_badlen++;
    485 			break;
    486 		}
    487 		icp->icmp_type = ICMP_TSTAMPREPLY;
    488 		icp->icmp_rtime = iptime();
    489 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    490 		goto reflect;
    491 
    492 	case ICMP_MASKREQ:
    493 		if (icmpmaskrepl == 0)
    494 			break;
    495 		/*
    496 		 * We are not able to respond with all ones broadcast
    497 		 * unless we receive it over a point-to-point interface.
    498 		 */
    499 		if (icmplen < ICMP_MASKLEN) {
    500 			icmpstat.icps_badlen++;
    501 			break;
    502 		}
    503 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    504 		    in_nullhost(ip->ip_dst))
    505 			icmpdst.sin_addr = ip->ip_src;
    506 		else
    507 			icmpdst.sin_addr = ip->ip_dst;
    508 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    509 		    m->m_pkthdr.rcvif));
    510 		if (ia == 0)
    511 			break;
    512 		icp->icmp_type = ICMP_MASKREPLY;
    513 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    514 		if (in_nullhost(ip->ip_src)) {
    515 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    516 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    517 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    518 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    519 		}
    520 reflect:
    521 		icmpstat.icps_reflect++;
    522 		icmpstat.icps_outhist[icp->icmp_type]++;
    523 		icmp_reflect(m);
    524 		return;
    525 
    526 	case ICMP_REDIRECT:
    527 		if (code > 3)
    528 			goto badcode;
    529 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    530 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    531 			icmpstat.icps_badlen++;
    532 			break;
    533 		}
    534 		/*
    535 		 * Short circuit routing redirects to force
    536 		 * immediate change in the kernel's routing
    537 		 * tables.  The message is also handed to anyone
    538 		 * listening on a raw socket (e.g. the routing
    539 		 * daemon for use in updating its tables).
    540 		 */
    541 		icmpgw.sin_addr = ip->ip_src;
    542 		icmpdst.sin_addr = icp->icmp_gwaddr;
    543 #ifdef	ICMPPRINTFS
    544 		if (icmpprintfs)
    545 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    546 			    icp->icmp_gwaddr);
    547 #endif
    548 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    549 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    550 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    551 		    sintosa(&icmpgw), (struct rtentry **)0);
    552 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    553 #ifdef IPSEC
    554 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    555 #endif
    556 		break;
    557 
    558 	/*
    559 	 * No kernel processing for the following;
    560 	 * just fall through to send to raw listener.
    561 	 */
    562 	case ICMP_ECHOREPLY:
    563 	case ICMP_ROUTERADVERT:
    564 	case ICMP_ROUTERSOLICIT:
    565 	case ICMP_TSTAMPREPLY:
    566 	case ICMP_IREQREPLY:
    567 	case ICMP_MASKREPLY:
    568 	default:
    569 		break;
    570 	}
    571 
    572 raw:
    573 	rip_input(m, hlen, proto);
    574 	return;
    575 
    576 freeit:
    577 	m_freem(m);
    578 	return;
    579 }
    580 
    581 /*
    582  * Reflect the ip packet back to the source
    583  */
    584 void
    585 icmp_reflect(m)
    586 	struct mbuf *m;
    587 {
    588 	struct ip *ip = mtod(m, struct ip *);
    589 	struct in_ifaddr *ia;
    590 	struct ifaddr *ifa;
    591 	struct sockaddr_in *sin = 0;
    592 	struct in_addr t;
    593 	struct mbuf *opts = 0;
    594 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    595 
    596 	if (!in_canforward(ip->ip_src) &&
    597 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    598 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    599 		m_freem(m);	/* Bad return address */
    600 		goto done;	/* ip_output() will check for broadcast */
    601 	}
    602 	t = ip->ip_dst;
    603 	ip->ip_dst = ip->ip_src;
    604 	/*
    605 	 * If the incoming packet was addressed directly to us, use
    606 	 * dst as the src for the reply.  Otherwise (broadcast or
    607 	 * anonymous), use an address which corresponds to the
    608 	 * incoming interface, with a preference for the address which
    609 	 * corresponds to the route to the destination of the ICMP.
    610 	 */
    611 
    612 	/* Look for packet addressed to us */
    613 	INADDR_TO_IA(t, ia);
    614 
    615 	/* look for packet sent to broadcast address */
    616 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    617 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    618 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    619 			if (ifa->ifa_addr->sa_family != AF_INET)
    620 				continue;
    621 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    622 				ia = ifatoia(ifa);
    623 				break;
    624 			}
    625 		}
    626 	}
    627 
    628 	if (ia)
    629 		sin = &ia->ia_addr;
    630 
    631 	icmpdst.sin_addr = t;
    632 
    633 	/* if the packet is addressed somewhere else, compute the
    634 	   source address for packets routed back to the source, and
    635 	   use that, if it's an address on the interface which
    636 	   received the packet */
    637 	if (sin == (struct sockaddr_in *)0) {
    638 		struct sockaddr_in sin_dst;
    639 		struct route icmproute;
    640 		int errornum;
    641 
    642 		sin_dst.sin_family = AF_INET;
    643 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    644 		sin_dst.sin_addr = ip->ip_dst;
    645 		bzero(&icmproute, sizeof(icmproute));
    646 		errornum = 0;
    647 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    648 		/* errornum is never used */
    649 		if (icmproute.ro_rt)
    650 			RTFREE(icmproute.ro_rt);
    651 		/* check to make sure sin is a source address on rcvif */
    652 		if (sin) {
    653 			t = sin->sin_addr;
    654 			sin = (struct sockaddr_in *)0;
    655 			INADDR_TO_IA(t, ia);
    656 			while (ia) {
    657 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    658 					sin = &ia->ia_addr;
    659 					break;
    660 				}
    661 				NEXT_IA_WITH_SAME_ADDR(ia);
    662 			}
    663 		}
    664 	}
    665 
    666 	/* if it was not addressed to us, but the route doesn't go out
    667 	   the source interface, pick an address on the source
    668 	   interface.  This can happen when routing is asymmetric, or
    669 	   when the incoming packet was encapsulated */
    670 	if (sin == (struct sockaddr_in *)0) {
    671 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    672 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    673 			if (ifa->ifa_addr->sa_family != AF_INET)
    674 				continue;
    675 			sin = &(ifatoia(ifa)->ia_addr);
    676 			break;
    677 		}
    678 	}
    679 
    680 	/*
    681 	 * The following happens if the packet was not addressed to us,
    682 	 * and was received on an interface with no IP address:
    683 	 * We find the first AF_INET address on the first non-loopback
    684 	 * interface.
    685 	 */
    686 	if (sin == (struct sockaddr_in *)0)
    687 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    688 		    ia = ia->ia_list.tqe_next) {
    689 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    690 				continue;
    691 			sin = &ia->ia_addr;
    692 			break;
    693 		}
    694 
    695 	/*
    696 	 * If we still didn't find an address, punt.  We could have an
    697 	 * interface up (and receiving packets) with no address.
    698 	 */
    699 	if (sin == (struct sockaddr_in *)0) {
    700 		m_freem(m);
    701 		goto done;
    702 	}
    703 
    704 	ip->ip_src = sin->sin_addr;
    705 	ip->ip_ttl = MAXTTL;
    706 
    707 	if (optlen > 0) {
    708 		u_char *cp;
    709 		int opt, cnt;
    710 		u_int len;
    711 
    712 		/*
    713 		 * Retrieve any source routing from the incoming packet;
    714 		 * add on any record-route or timestamp options.
    715 		 */
    716 		cp = (u_char *) (ip + 1);
    717 		if ((opts = ip_srcroute()) == 0 &&
    718 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    719 			opts->m_len = sizeof(struct in_addr);
    720 			*mtod(opts, struct in_addr *) = zeroin_addr;
    721 		}
    722 		if (opts) {
    723 #ifdef ICMPPRINTFS
    724 		    if (icmpprintfs)
    725 			    printf("icmp_reflect optlen %d rt %d => ",
    726 				optlen, opts->m_len);
    727 #endif
    728 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    729 			    opt = cp[IPOPT_OPTVAL];
    730 			    if (opt == IPOPT_EOL)
    731 				    break;
    732 			    if (opt == IPOPT_NOP)
    733 				    len = 1;
    734 			    else {
    735 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    736 					    break;
    737 				    len = cp[IPOPT_OLEN];
    738 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    739 				        len > cnt)
    740 					    break;
    741 			    }
    742 			    /*
    743 			     * Should check for overflow, but it "can't happen"
    744 			     */
    745 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    746 				opt == IPOPT_SECURITY) {
    747 				    bcopy((caddr_t)cp,
    748 					mtod(opts, caddr_t) + opts->m_len, len);
    749 				    opts->m_len += len;
    750 			    }
    751 		    }
    752 		    /* Terminate & pad, if necessary */
    753 		    if ((cnt = opts->m_len % 4) != 0) {
    754 			    for (; cnt < 4; cnt++) {
    755 				    *(mtod(opts, caddr_t) + opts->m_len) =
    756 					IPOPT_EOL;
    757 				    opts->m_len++;
    758 			    }
    759 		    }
    760 #ifdef ICMPPRINTFS
    761 		    if (icmpprintfs)
    762 			    printf("%d\n", opts->m_len);
    763 #endif
    764 		}
    765 		/*
    766 		 * Now strip out original options by copying rest of first
    767 		 * mbuf's data back, and adjust the IP length.
    768 		 */
    769 		ip->ip_len -= optlen;
    770 		ip->ip_hl = sizeof(struct ip) >> 2;
    771 		m->m_len -= optlen;
    772 		if (m->m_flags & M_PKTHDR)
    773 			m->m_pkthdr.len -= optlen;
    774 		optlen += sizeof(struct ip);
    775 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    776 			 (unsigned)(m->m_len - sizeof(struct ip)));
    777 	}
    778 	m->m_flags &= ~(M_BCAST|M_MCAST);
    779 	icmp_send(m, opts);
    780 done:
    781 	if (opts)
    782 		(void)m_free(opts);
    783 }
    784 
    785 /*
    786  * Send an icmp packet back to the ip level,
    787  * after supplying a checksum.
    788  */
    789 void
    790 icmp_send(m, opts)
    791 	struct mbuf *m;
    792 	struct mbuf *opts;
    793 {
    794 	struct ip *ip = mtod(m, struct ip *);
    795 	int hlen;
    796 	struct icmp *icp;
    797 
    798 	hlen = ip->ip_hl << 2;
    799 	m->m_data += hlen;
    800 	m->m_len -= hlen;
    801 	icp = mtod(m, struct icmp *);
    802 	icp->icmp_cksum = 0;
    803 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    804 	m->m_data -= hlen;
    805 	m->m_len += hlen;
    806 #ifdef ICMPPRINTFS
    807 	if (icmpprintfs)
    808 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    809 #endif
    810 #ifdef IPSEC
    811 	/* Don't lookup socket */
    812 	ipsec_setsocket(m, NULL);
    813 #endif
    814 	(void) ip_output(m, opts, NULL, 0, NULL);
    815 }
    816 
    817 n_time
    818 iptime()
    819 {
    820 	struct timeval atv;
    821 	u_long t;
    822 
    823 	microtime(&atv);
    824 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    825 	return (htonl(t));
    826 }
    827 
    828 int
    829 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    830 	int *name;
    831 	u_int namelen;
    832 	void *oldp;
    833 	size_t *oldlenp;
    834 	void *newp;
    835 	size_t newlen;
    836 {
    837 	int arg, error, s;
    838 
    839 	/* All sysctl names at this level are terminal. */
    840 	if (namelen != 1)
    841 		return (ENOTDIR);
    842 
    843 	switch (name[0])
    844 	{
    845 	case ICMPCTL_MASKREPL:
    846 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    847 		break;
    848 	case ICMPCTL_ERRRATELIMIT:
    849 		/*
    850 		 * The sysctl specifies the rate in usec-between-icmp,
    851 		 * so we must convert from/to a timeval.
    852 		 */
    853 		arg = (icmperrratelim.tv_sec * 1000000) +
    854 		    icmperrratelim.tv_usec;
    855 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    856 		if (error)
    857 			break;
    858 		if (arg >= 0) {
    859 			s = splsoftnet();
    860 			icmperrratelim.tv_sec = arg / 1000000;
    861 			icmperrratelim.tv_usec = arg % 1000000;
    862 			splx(s);
    863 		} else
    864 			error = EINVAL;
    865 		break;
    866 	case ICMPCTL_RETURNDATABYTES:
    867 		arg = icmpreturndatabytes;
    868 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    869 		if (error)
    870 			break;
    871 		if ((arg >= 8) || (arg <= 512))
    872 			icmpreturndatabytes = arg;
    873 		else
    874 			error = EINVAL;
    875 		break;
    876 	case ICMPCTL_ERRPPSLIMIT:
    877 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    878 		break;
    879 	default:
    880 		error = ENOPROTOOPT;
    881 		break;
    882 	}
    883 	return error;
    884 }
    885 
    886 static void
    887 icmp_mtudisc(icp)
    888 	struct icmp *icp;
    889 {
    890 	struct rtentry *rt;
    891 	struct sockaddr *dst = sintosa(&icmpsrc);
    892 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    893 	int    error;
    894 
    895 	/* Table of common MTUs: */
    896 
    897 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    898 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    899 
    900 	rt = rtalloc1(dst, 1);
    901 	if (rt == 0)
    902 		return;
    903 
    904 	/* If we didn't get a host route, allocate one */
    905 
    906 	if ((rt->rt_flags & RTF_HOST) == 0) {
    907 		struct rtentry *nrt;
    908 
    909 		error = rtrequest((int) RTM_ADD, dst,
    910 		    (struct sockaddr *) rt->rt_gateway,
    911 		    (struct sockaddr *) 0,
    912 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    913 		if (error) {
    914 			rtfree(rt);
    915 			rtfree(nrt);
    916 			return;
    917 		}
    918 		nrt->rt_rmx = rt->rt_rmx;
    919 		rtfree(rt);
    920 		rt = nrt;
    921 	}
    922 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    923 	if (error) {
    924 		rtfree(rt);
    925 		return;
    926 	}
    927 
    928 	if (mtu == 0) {
    929 		int i = 0;
    930 
    931 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    932 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    933 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    934 			mtu -= (icp->icmp_ip.ip_hl << 2);
    935 
    936 		/* If we still can't guess a value, try the route */
    937 
    938 		if (mtu == 0) {
    939 			mtu = rt->rt_rmx.rmx_mtu;
    940 
    941 			/* If no route mtu, default to the interface mtu */
    942 
    943 			if (mtu == 0)
    944 				mtu = rt->rt_ifp->if_mtu;
    945 		}
    946 
    947 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    948 			if (mtu > mtu_table[i]) {
    949 				mtu = mtu_table[i];
    950 				break;
    951 			}
    952 	}
    953 
    954 	/*
    955 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    956 	 *	  to turn off PMTU for a route, and the kernel can
    957 	 *	  set it to indicate a serious problem with PMTU
    958 	 *	  on a route.  We should be using a separate flag
    959 	 *	  for the kernel to indicate this.
    960 	 */
    961 
    962 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    963 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    964 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    965 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    966 			 rt->rt_rmx.rmx_mtu == 0)
    967 			rt->rt_rmx.rmx_mtu = mtu;
    968 	}
    969 
    970 	if (rt)
    971 		rtfree(rt);
    972 }
    973 
    974 /*
    975  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    976  * given current value MTU.  If DIR is less than zero, a larger plateau
    977  * is returned; otherwise, a smaller value is returned.
    978  */
    979 int
    980 ip_next_mtu(mtu, dir)	/* XXX */
    981 	int mtu;
    982 	int dir;
    983 {
    984 	static int mtutab[] = {
    985 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    986 		68, 0
    987 	};
    988 	int i;
    989 
    990 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
    991 		if (mtu >= mtutab[i])
    992 			break;
    993 	}
    994 
    995 	if (dir < 0) {
    996 		if (i == 0) {
    997 			return 0;
    998 		} else {
    999 			return mtutab[i - 1];
   1000 		}
   1001 	} else {
   1002 		if (mtutab[i] == 0) {
   1003 			return 0;
   1004 		} else if(mtu > mtutab[i]) {
   1005 			return mtutab[i];
   1006 		} else {
   1007 			return mtutab[i + 1];
   1008 		}
   1009 	}
   1010 }
   1011 
   1012 static void
   1013 icmp_mtudisc_timeout(rt, r)
   1014 	struct rtentry *rt;
   1015 	struct rttimer *r;
   1016 {
   1017 	if (rt == NULL)
   1018 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1019 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1020 	    (RTF_DYNAMIC | RTF_HOST)) {
   1021 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1022 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1023 	} else {
   1024 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1025 			rt->rt_rmx.rmx_mtu = 0;
   1026 		}
   1027 	}
   1028 }
   1029 
   1030 /*
   1031  * Perform rate limit check.
   1032  * Returns 0 if it is okay to send the icmp packet.
   1033  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1034  * limitation.
   1035  *
   1036  * XXX per-destination/type check necessary?
   1037  */
   1038 static int
   1039 icmp_ratelimit(dst, type, code)
   1040 	const struct in_addr *dst;
   1041 	const int type;			/* not used at this moment */
   1042 	const int code;			/* not used at this moment */
   1043 {
   1044 	static struct timeval icmperrratelim_last;
   1045 	struct in_ifaddr *ia;
   1046 
   1047 	/*
   1048 	 * Don't rate-limit if it's for us!
   1049 	 */
   1050 	INADDR_TO_IA(*dst, ia);
   1051 	if (ia != NULL)
   1052 		return 0;
   1053 
   1054 	/* PPS limit */
   1055 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1056 	    icmperrppslim)) {
   1057 		/* The packet is subject to rate limit */
   1058 		return 1;
   1059 	}
   1060 
   1061 	/*
   1062 	 * ratecheck() returns true if it is okay to send.  We return
   1063 	 * true if it is not okay to send.
   1064 	 */
   1065 	if (!ratecheck(&icmperrratelim_last, &icmperrratelim)) {
   1066 		/* The packet is subject to rate limit */
   1067 		return 1;
   1068 	}
   1069 
   1070 	/*okay to send*/
   1071 	return 0;
   1072 }
   1073