Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.43
      1 /*	$NetBSD: ip_icmp.c,v 1.43 2000/03/01 12:49:32 itojun Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    102  */
    103 
    104 #include "opt_ipsec.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/mbuf.h>
    110 #include <sys/protosw.h>
    111 #include <sys/socket.h>
    112 #include <sys/time.h>
    113 #include <sys/kernel.h>
    114 #include <sys/proc.h>
    115 
    116 #include <vm/vm.h>
    117 #include <sys/sysctl.h>
    118 
    119 #include <net/if.h>
    120 #include <net/route.h>
    121 
    122 #include <netinet/in.h>
    123 #include <netinet/in_systm.h>
    124 #include <netinet/in_var.h>
    125 #include <netinet/ip.h>
    126 #include <netinet/ip_icmp.h>
    127 #include <netinet/ip_var.h>
    128 #include <netinet/in_pcb.h>
    129 #include <netinet/icmp_var.h>
    130 
    131 #ifdef IPSEC
    132 #include <netinet6/ipsec.h>
    133 #include <netkey/key.h>
    134 #include <netkey/key_debug.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 
    150 #if 0
    151 static int	ip_next_mtu __P((int, int));
    152 #else
    153 /*static*/ int	ip_next_mtu __P((int, int));
    154 #endif
    155 
    156 extern	struct timeval icmperrratelim;
    157 
    158 static void icmp_mtudisc __P((struct icmp *));
    159 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    160 
    161 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    162 
    163 /*
    164  * Generate an error packet of type error
    165  * in response to bad packet ip.
    166  */
    167 void
    168 icmp_error(n, type, code, dest, destifp)
    169 	struct mbuf *n;
    170 	int type, code;
    171 	n_long dest;
    172 	struct ifnet *destifp;
    173 {
    174 	register struct ip *oip = mtod(n, struct ip *), *nip;
    175 	register unsigned oiplen = oip->ip_hl << 2;
    176 	register struct icmp *icp;
    177 	register struct mbuf *m;
    178 	unsigned icmplen;
    179 
    180 #ifdef ICMPPRINTFS
    181 	if (icmpprintfs)
    182 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    183 #endif
    184 	if (type != ICMP_REDIRECT)
    185 		icmpstat.icps_error++;
    186 	/*
    187 	 * Don't send error if the original packet was encrypted.
    188 	 * Don't send error if not the first fragment of message.
    189 	 * Don't error if the old packet protocol was ICMP
    190 	 * error message, only known informational types.
    191 	 */
    192 	if (n->m_flags & M_DECRYPTED)
    193 		goto freeit;
    194 	if (oip->ip_off &~ (IP_MF|IP_DF))
    195 		goto freeit;
    196 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    197 	  n->m_len >= oiplen + ICMP_MINLEN &&
    198 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    199 		icmpstat.icps_oldicmp++;
    200 		goto freeit;
    201 	}
    202 	/* Don't send error in response to a multicast or broadcast packet */
    203 	if (n->m_flags & (M_BCAST|M_MCAST))
    204 		goto freeit;
    205 
    206 	/*
    207 	 * First, do a rate limitation check.
    208 	 */
    209 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    210 		/* XXX stat */
    211 		goto freeit;
    212 	}
    213 
    214 	/*
    215 	 * Now, formulate icmp message
    216 	 */
    217 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    218 	if (m == NULL)
    219 		goto freeit;
    220 	icmplen = oiplen + min(8, oip->ip_len - oiplen);
    221 	m->m_len = icmplen + ICMP_MINLEN;
    222 	MH_ALIGN(m, m->m_len);
    223 	icp = mtod(m, struct icmp *);
    224 	if ((u_int)type > ICMP_MAXTYPE)
    225 		panic("icmp_error");
    226 	icmpstat.icps_outhist[type]++;
    227 	icp->icmp_type = type;
    228 	if (type == ICMP_REDIRECT)
    229 		icp->icmp_gwaddr.s_addr = dest;
    230 	else {
    231 		icp->icmp_void = 0;
    232 		/*
    233 		 * The following assignments assume an overlay with the
    234 		 * zeroed icmp_void field.
    235 		 */
    236 		if (type == ICMP_PARAMPROB) {
    237 			icp->icmp_pptr = code;
    238 			code = 0;
    239 		} else if (type == ICMP_UNREACH &&
    240 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    241 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    242 	}
    243 
    244 	HTONS(oip->ip_off);
    245 	HTONS(oip->ip_len);
    246 	icp->icmp_code = code;
    247 	bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen);
    248 	nip = &icp->icmp_ip;
    249 
    250 	/*
    251 	 * Now, copy old ip header (without options)
    252 	 * in front of icmp message.
    253 	 */
    254 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    255 		panic("icmp len");
    256 	m->m_data -= sizeof(struct ip);
    257 	m->m_len += sizeof(struct ip);
    258 	m->m_pkthdr.len = m->m_len;
    259 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    260 	nip = mtod(m, struct ip *);
    261 	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
    262 	nip->ip_len = m->m_len;
    263 	nip->ip_hl = sizeof(struct ip) >> 2;
    264 	nip->ip_p = IPPROTO_ICMP;
    265 	nip->ip_tos = 0;
    266 	icmp_reflect(m);
    267 
    268 freeit:
    269 	m_freem(n);
    270 }
    271 
    272 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    273 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    274 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    275 struct sockaddr_in icmpmask = { 8, 0 };
    276 
    277 /*
    278  * Process a received ICMP message.
    279  */
    280 void
    281 #if __STDC__
    282 icmp_input(struct mbuf *m, ...)
    283 #else
    284 icmp_input(m, va_alist)
    285 	struct mbuf *m;
    286 	va_dcl
    287 #endif
    288 {
    289 	int proto;
    290 	register struct icmp *icp;
    291 	register struct ip *ip = mtod(m, struct ip *);
    292 	int icmplen;
    293 	register int i;
    294 	struct in_ifaddr *ia;
    295 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    296 	int code;
    297 	int hlen;
    298 	va_list ap;
    299 
    300 	va_start(ap, m);
    301 	hlen = va_arg(ap, int);
    302 	proto = va_arg(ap, int);
    303 	va_end(ap);
    304 
    305 	/*
    306 	 * Locate icmp structure in mbuf, and check
    307 	 * that not corrupted and of at least minimum length.
    308 	 */
    309 	icmplen = ip->ip_len - hlen;
    310 #ifdef ICMPPRINTFS
    311 	if (icmpprintfs)
    312 		printf("icmp_input from %x to %x, len %d\n",
    313 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    314 		    icmplen);
    315 #endif
    316 	if (icmplen < ICMP_MINLEN) {
    317 		icmpstat.icps_tooshort++;
    318 		goto freeit;
    319 	}
    320 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    321 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    322 		icmpstat.icps_tooshort++;
    323 		return;
    324 	}
    325 	ip = mtod(m, struct ip *);
    326 	m->m_len -= hlen;
    327 	m->m_data += hlen;
    328 	icp = mtod(m, struct icmp *);
    329 	if (in_cksum(m, icmplen)) {
    330 		icmpstat.icps_checksum++;
    331 		goto freeit;
    332 	}
    333 	m->m_len += hlen;
    334 	m->m_data -= hlen;
    335 
    336 #ifdef ICMPPRINTFS
    337 	/*
    338 	 * Message type specific processing.
    339 	 */
    340 	if (icmpprintfs)
    341 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    342 		    icp->icmp_code);
    343 #endif
    344 #ifdef IPSEC
    345 	/* drop it if it does not match the policy */
    346 	if (ipsec4_in_reject(m, NULL)) {
    347 		ipsecstat.in_polvio++;
    348 		goto freeit;
    349 	}
    350 #endif
    351 	if (icp->icmp_type > ICMP_MAXTYPE)
    352 		goto raw;
    353 	icmpstat.icps_inhist[icp->icmp_type]++;
    354 	code = icp->icmp_code;
    355 	switch (icp->icmp_type) {
    356 
    357 	case ICMP_UNREACH:
    358 		switch (code) {
    359 			case ICMP_UNREACH_NET:
    360 			case ICMP_UNREACH_HOST:
    361 			case ICMP_UNREACH_PROTOCOL:
    362 			case ICMP_UNREACH_PORT:
    363 			case ICMP_UNREACH_SRCFAIL:
    364 				code += PRC_UNREACH_NET;
    365 				break;
    366 
    367 			case ICMP_UNREACH_NEEDFRAG:
    368 				code = PRC_MSGSIZE;
    369 				break;
    370 
    371 			case ICMP_UNREACH_NET_UNKNOWN:
    372 			case ICMP_UNREACH_NET_PROHIB:
    373 			case ICMP_UNREACH_TOSNET:
    374 				code = PRC_UNREACH_NET;
    375 				break;
    376 
    377 			case ICMP_UNREACH_HOST_UNKNOWN:
    378 			case ICMP_UNREACH_ISOLATED:
    379 			case ICMP_UNREACH_HOST_PROHIB:
    380 			case ICMP_UNREACH_TOSHOST:
    381 				code = PRC_UNREACH_HOST;
    382 				break;
    383 
    384 			default:
    385 				goto badcode;
    386 		}
    387 		goto deliver;
    388 
    389 	case ICMP_TIMXCEED:
    390 		if (code > 1)
    391 			goto badcode;
    392 		code += PRC_TIMXCEED_INTRANS;
    393 		goto deliver;
    394 
    395 	case ICMP_PARAMPROB:
    396 		if (code > 1)
    397 			goto badcode;
    398 		code = PRC_PARAMPROB;
    399 		goto deliver;
    400 
    401 	case ICMP_SOURCEQUENCH:
    402 		if (code)
    403 			goto badcode;
    404 		code = PRC_QUENCH;
    405 		goto deliver;
    406 
    407 	deliver:
    408 		/*
    409 		 * Problem with datagram; advise higher level routines.
    410 		 */
    411 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    412 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    413 			icmpstat.icps_badlen++;
    414 			goto freeit;
    415 		}
    416 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    417 			goto badcode;
    418 		NTOHS(icp->icmp_ip.ip_len);
    419 #ifdef ICMPPRINTFS
    420 		if (icmpprintfs)
    421 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    422 #endif
    423 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    424 		if (code == PRC_MSGSIZE && ip_mtudisc)
    425 			icmp_mtudisc(icp);
    426 		/*
    427 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
    428 		 * notification to TCP layer.
    429 		 */
    430 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    431 		if (ctlfunc)
    432 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
    433 		break;
    434 
    435 	badcode:
    436 		icmpstat.icps_badcode++;
    437 		break;
    438 
    439 	case ICMP_ECHO:
    440 		icp->icmp_type = ICMP_ECHOREPLY;
    441 		goto reflect;
    442 
    443 	case ICMP_TSTAMP:
    444 		if (icmplen < ICMP_TSLEN) {
    445 			icmpstat.icps_badlen++;
    446 			break;
    447 		}
    448 		icp->icmp_type = ICMP_TSTAMPREPLY;
    449 		icp->icmp_rtime = iptime();
    450 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    451 		goto reflect;
    452 
    453 	case ICMP_MASKREQ:
    454 		if (icmpmaskrepl == 0)
    455 			break;
    456 		/*
    457 		 * We are not able to respond with all ones broadcast
    458 		 * unless we receive it over a point-to-point interface.
    459 		 */
    460 		if (icmplen < ICMP_MASKLEN) {
    461 			icmpstat.icps_badlen++;
    462 			break;
    463 		}
    464 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    465 		    in_nullhost(ip->ip_dst))
    466 			icmpdst.sin_addr = ip->ip_src;
    467 		else
    468 			icmpdst.sin_addr = ip->ip_dst;
    469 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    470 		    m->m_pkthdr.rcvif));
    471 		if (ia == 0)
    472 			break;
    473 		icp->icmp_type = ICMP_MASKREPLY;
    474 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    475 		if (in_nullhost(ip->ip_src)) {
    476 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    477 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    478 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    479 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    480 		}
    481 reflect:
    482 		icmpstat.icps_reflect++;
    483 		icmpstat.icps_outhist[icp->icmp_type]++;
    484 		icmp_reflect(m);
    485 		return;
    486 
    487 	case ICMP_REDIRECT:
    488 		if (code > 3)
    489 			goto badcode;
    490 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    491 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    492 			icmpstat.icps_badlen++;
    493 			break;
    494 		}
    495 		/*
    496 		 * Short circuit routing redirects to force
    497 		 * immediate change in the kernel's routing
    498 		 * tables.  The message is also handed to anyone
    499 		 * listening on a raw socket (e.g. the routing
    500 		 * daemon for use in updating its tables).
    501 		 */
    502 		icmpgw.sin_addr = ip->ip_src;
    503 		icmpdst.sin_addr = icp->icmp_gwaddr;
    504 #ifdef	ICMPPRINTFS
    505 		if (icmpprintfs)
    506 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    507 			    icp->icmp_gwaddr);
    508 #endif
    509 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    510 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    511 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    512 		    sintosa(&icmpgw), (struct rtentry **)0);
    513 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    514 #ifdef IPSEC
    515 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    516 #endif
    517 		break;
    518 
    519 	/*
    520 	 * No kernel processing for the following;
    521 	 * just fall through to send to raw listener.
    522 	 */
    523 	case ICMP_ECHOREPLY:
    524 	case ICMP_ROUTERADVERT:
    525 	case ICMP_ROUTERSOLICIT:
    526 	case ICMP_TSTAMPREPLY:
    527 	case ICMP_IREQREPLY:
    528 	case ICMP_MASKREPLY:
    529 	default:
    530 		break;
    531 	}
    532 
    533 raw:
    534 	rip_input(m, hlen, proto);
    535 	return;
    536 
    537 freeit:
    538 	m_freem(m);
    539 	return;
    540 }
    541 
    542 /*
    543  * Reflect the ip packet back to the source
    544  */
    545 void
    546 icmp_reflect(m)
    547 	struct mbuf *m;
    548 {
    549 	register struct ip *ip = mtod(m, struct ip *);
    550 	register struct in_ifaddr *ia;
    551 	register struct ifaddr *ifa;
    552 	struct sockaddr_in *sin = 0;
    553 	struct in_addr t;
    554 	struct mbuf *opts = 0;
    555 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    556 
    557 	if (!in_canforward(ip->ip_src) &&
    558 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    559 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    560 		m_freem(m);	/* Bad return address */
    561 		goto done;	/* ip_output() will check for broadcast */
    562 	}
    563 	t = ip->ip_dst;
    564 	ip->ip_dst = ip->ip_src;
    565 	/*
    566 	 * If the incoming packet was addressed directly to us, use
    567 	 * dst as the src for the reply.  Otherwise (broadcast or
    568 	 * anonymous), use an address which corresponds to the
    569 	 * incoming interface, with a preference for the address which
    570 	 * corresponds to the route to the destination of the ICMP.
    571 	 */
    572 
    573 	/* Look for packet addressed to us */
    574 	INADDR_TO_IA(t, ia);
    575 
    576 	/* look for packet sent to broadcast address */
    577 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    578 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    579 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    580 			if (ifa->ifa_addr->sa_family != AF_INET)
    581 				continue;
    582 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    583 				ia = ifatoia(ifa);
    584 				break;
    585 			}
    586 		}
    587 	}
    588 
    589 	if (ia)
    590 		sin = &ia->ia_addr;
    591 
    592 	icmpdst.sin_addr = t;
    593 
    594 	/* if the packet is addressed somewhere else, compute the
    595 	   source address for packets routed back to the source, and
    596 	   use that, if it's an address on the interface which
    597 	   received the packet */
    598 	if (sin == (struct sockaddr_in *)0) {
    599 		struct sockaddr_in sin_dst;
    600 		struct route icmproute;
    601 		int errornum;
    602 
    603 		sin_dst.sin_family = AF_INET;
    604 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    605 		sin_dst.sin_addr = ip->ip_dst;
    606 		bzero(&icmproute, sizeof(icmproute));
    607 		errornum = 0;
    608 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    609 		/* errornum is never used */
    610 		if (icmproute.ro_rt)
    611 			RTFREE(icmproute.ro_rt);
    612 		/* check to make sure sin is a source address on rcvif */
    613 		if (sin) {
    614 			t = sin->sin_addr;
    615 			sin = (struct sockaddr_in *)0;
    616 			INADDR_TO_IA(t, ia);
    617 			while (ia) {
    618 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    619 					sin = &ia->ia_addr;
    620 					break;
    621 				}
    622 				NEXT_IA_WITH_SAME_ADDR(ia);
    623 			}
    624 		}
    625 	}
    626 
    627 	/* if it was not addressed to us, but the route doesn't go out
    628 	   the source interface, pick an address on the source
    629 	   interface.  This can happen when routing is asymmetric, or
    630 	   when the incoming packet was encapsulated */
    631 	if (sin == (struct sockaddr_in *)0) {
    632 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    633 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    634 			if (ifa->ifa_addr->sa_family != AF_INET)
    635 				continue;
    636 			sin = &(ifatoia(ifa)->ia_addr);
    637 			break;
    638 		}
    639 	}
    640 
    641 	/*
    642 	 * The following happens if the packet was not addressed to us,
    643 	 * and was received on an interface with no IP address:
    644 	 * We find the first AF_INET address on the first non-loopback
    645 	 * interface.
    646 	 */
    647 	if (sin == (struct sockaddr_in *)0)
    648 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    649 		    ia = ia->ia_list.tqe_next) {
    650 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    651 				continue;
    652 			sin = &ia->ia_addr;
    653 			break;
    654 		}
    655 
    656 	/*
    657 	 * If we still didn't find an address, punt.  We could have an
    658 	 * interface up (and receiving packets) with no address.
    659 	 */
    660 	if (sin == (struct sockaddr_in *)0) {
    661 		m_freem(m);
    662 		goto done;
    663 	}
    664 
    665 	ip->ip_src = sin->sin_addr;
    666 	ip->ip_ttl = MAXTTL;
    667 
    668 	if (optlen > 0) {
    669 		register u_char *cp;
    670 		int opt, cnt;
    671 		u_int len;
    672 
    673 		/*
    674 		 * Retrieve any source routing from the incoming packet;
    675 		 * add on any record-route or timestamp options.
    676 		 */
    677 		cp = (u_char *) (ip + 1);
    678 		if ((opts = ip_srcroute()) == 0 &&
    679 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    680 			opts->m_len = sizeof(struct in_addr);
    681 			*mtod(opts, struct in_addr *) = zeroin_addr;
    682 		}
    683 		if (opts) {
    684 #ifdef ICMPPRINTFS
    685 		    if (icmpprintfs)
    686 			    printf("icmp_reflect optlen %d rt %d => ",
    687 				optlen, opts->m_len);
    688 #endif
    689 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    690 			    opt = cp[IPOPT_OPTVAL];
    691 			    if (opt == IPOPT_EOL)
    692 				    break;
    693 			    if (opt == IPOPT_NOP)
    694 				    len = 1;
    695 			    else {
    696 				    len = cp[IPOPT_OLEN];
    697 				    if (len <= 0 || len > cnt)
    698 					    break;
    699 			    }
    700 			    /*
    701 			     * Should check for overflow, but it "can't happen"
    702 			     */
    703 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    704 				opt == IPOPT_SECURITY) {
    705 				    bcopy((caddr_t)cp,
    706 					mtod(opts, caddr_t) + opts->m_len, len);
    707 				    opts->m_len += len;
    708 			    }
    709 		    }
    710 		    /* Terminate & pad, if necessary */
    711 		    if ((cnt = opts->m_len % 4) != 0) {
    712 			    for (; cnt < 4; cnt++) {
    713 				    *(mtod(opts, caddr_t) + opts->m_len) =
    714 					IPOPT_EOL;
    715 				    opts->m_len++;
    716 			    }
    717 		    }
    718 #ifdef ICMPPRINTFS
    719 		    if (icmpprintfs)
    720 			    printf("%d\n", opts->m_len);
    721 #endif
    722 		}
    723 		/*
    724 		 * Now strip out original options by copying rest of first
    725 		 * mbuf's data back, and adjust the IP length.
    726 		 */
    727 		ip->ip_len -= optlen;
    728 		ip->ip_hl = sizeof(struct ip) >> 2;
    729 		m->m_len -= optlen;
    730 		if (m->m_flags & M_PKTHDR)
    731 			m->m_pkthdr.len -= optlen;
    732 		optlen += sizeof(struct ip);
    733 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    734 			 (unsigned)(m->m_len - sizeof(struct ip)));
    735 	}
    736 	m->m_flags &= ~(M_BCAST|M_MCAST);
    737 	icmp_send(m, opts);
    738 done:
    739 	if (opts)
    740 		(void)m_free(opts);
    741 }
    742 
    743 /*
    744  * Send an icmp packet back to the ip level,
    745  * after supplying a checksum.
    746  */
    747 void
    748 icmp_send(m, opts)
    749 	register struct mbuf *m;
    750 	struct mbuf *opts;
    751 {
    752 	register struct ip *ip = mtod(m, struct ip *);
    753 	register int hlen;
    754 	register struct icmp *icp;
    755 
    756 	hlen = ip->ip_hl << 2;
    757 	m->m_data += hlen;
    758 	m->m_len -= hlen;
    759 	icp = mtod(m, struct icmp *);
    760 	icp->icmp_cksum = 0;
    761 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    762 	m->m_data -= hlen;
    763 	m->m_len += hlen;
    764 #ifdef ICMPPRINTFS
    765 	if (icmpprintfs)
    766 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    767 #endif
    768 #ifdef IPSEC
    769 	/* Don't lookup socket */
    770 	ipsec_setsocket(m, NULL);
    771 #endif
    772 	(void) ip_output(m, opts, NULL, 0, NULL);
    773 }
    774 
    775 n_time
    776 iptime()
    777 {
    778 	struct timeval atv;
    779 	u_long t;
    780 
    781 	microtime(&atv);
    782 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    783 	return (htonl(t));
    784 }
    785 
    786 int
    787 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    788 	int *name;
    789 	u_int namelen;
    790 	void *oldp;
    791 	size_t *oldlenp;
    792 	void *newp;
    793 	size_t newlen;
    794 {
    795 
    796 	/* All sysctl names at this level are terminal. */
    797 	if (namelen != 1)
    798 		return (ENOTDIR);
    799 
    800 	switch (name[0]) {
    801 	case ICMPCTL_MASKREPL:
    802 		return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl));
    803 	case ICMPCTL_ERRRATELIMIT:
    804 	    {
    805 		int rate_usec, error, s;
    806 
    807 		/*
    808 		 * The sysctl specifies the rate in usec-between-icmp,
    809 		 * so we must convert from/to a timeval.
    810 		 */
    811 		rate_usec = (icmperrratelim.tv_sec * 1000000) +
    812 		    icmperrratelim.tv_usec;
    813 		error = sysctl_int(oldp, oldlenp, newp, newlen, &rate_usec);
    814 		if (error)
    815 			return (error);
    816 		s = splsoftnet();
    817 		icmperrratelim.tv_sec = rate_usec / 1000000;
    818 		icmperrratelim.tv_usec = rate_usec % 1000000;
    819 		splx(s);
    820 
    821 		return (0);
    822 	    }
    823 	default:
    824 		return (ENOPROTOOPT);
    825 	}
    826 	/* NOTREACHED */
    827 }
    828 
    829 static void
    830 icmp_mtudisc(icp)
    831 	struct icmp *icp;
    832 {
    833 	struct rtentry *rt;
    834 	struct sockaddr *dst = sintosa(&icmpsrc);
    835 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    836 	int    error;
    837 
    838 	/* Table of common MTUs: */
    839 
    840 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    841 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    842 
    843 	rt = rtalloc1(dst, 1);
    844 	if (rt == 0)
    845 		return;
    846 
    847 	/* If we didn't get a host route, allocate one */
    848 
    849 	if ((rt->rt_flags & RTF_HOST) == 0) {
    850 		struct rtentry *nrt;
    851 
    852 		error = rtrequest((int) RTM_ADD, dst,
    853 		    (struct sockaddr *) rt->rt_gateway,
    854 		    (struct sockaddr *) 0,
    855 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    856 		if (error) {
    857 			rtfree(rt);
    858 			rtfree(nrt);
    859 			return;
    860 		}
    861 		nrt->rt_rmx = rt->rt_rmx;
    862 		rtfree(rt);
    863 		rt = nrt;
    864 	}
    865 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    866 	if (error) {
    867 		rtfree(rt);
    868 		return;
    869 	}
    870 
    871 	if (mtu == 0) {
    872 		int i = 0;
    873 
    874 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    875 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    876 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    877 			mtu -= (icp->icmp_ip.ip_hl << 2);
    878 
    879 		/* If we still can't guess a value, try the route */
    880 
    881 		if (mtu == 0) {
    882 			mtu = rt->rt_rmx.rmx_mtu;
    883 
    884 			/* If no route mtu, default to the interface mtu */
    885 
    886 			if (mtu == 0)
    887 				mtu = rt->rt_ifp->if_mtu;
    888 		}
    889 
    890 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    891 			if (mtu > mtu_table[i]) {
    892 				mtu = mtu_table[i];
    893 				break;
    894 			}
    895 	}
    896 
    897 	/*
    898 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    899 	 *	  to turn off PMTU for a route, and the kernel can
    900 	 *	  set it to indicate a serious problem with PMTU
    901 	 *	  on a route.  We should be using a separate flag
    902 	 *	  for the kernel to indicate this.
    903 	 */
    904 
    905 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    906 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    907 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    908 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    909 			 rt->rt_rmx.rmx_mtu == 0)
    910 			rt->rt_rmx.rmx_mtu = mtu;
    911 	}
    912 
    913 	if (rt)
    914 		rtfree(rt);
    915 }
    916 
    917 /*
    918  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    919  * given current value MTU.  If DIR is less than zero, a larger plateau
    920  * is returned; otherwise, a smaller value is returned.
    921  */
    922 int
    923 ip_next_mtu(mtu, dir)	/* XXX */
    924 	int mtu;
    925 	int dir;
    926 {
    927 	static int mtutab[] = {
    928 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    929 		68, 0
    930 	};
    931 	int i;
    932 
    933 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
    934 		if (mtu >= mtutab[i])
    935 			break;
    936 	}
    937 
    938 	if (dir < 0) {
    939 		if (i == 0) {
    940 			return 0;
    941 		} else {
    942 			return mtutab[i - 1];
    943 		}
    944 	} else {
    945 		if (mtutab[i] == 0) {
    946 			return 0;
    947 		} else if(mtu > mtutab[i]) {
    948 			return mtutab[i];
    949 		} else {
    950 			return mtutab[i + 1];
    951 		}
    952 	}
    953 }
    954 
    955 static void
    956 icmp_mtudisc_timeout(rt, r)
    957 	struct rtentry *rt;
    958 	struct rttimer *r;
    959 {
    960 	if (rt == NULL)
    961 		panic("icmp_mtudisc_timeout:  bad route to timeout");
    962 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
    963 	    (RTF_DYNAMIC | RTF_HOST)) {
    964 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
    965 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
    966 	} else {
    967 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    968 			rt->rt_rmx.rmx_mtu = 0;
    969 		}
    970 	}
    971 }
    972 
    973 /*
    974  * Perform rate limit check.
    975  * Returns 0 if it is okay to send the icmp packet.
    976  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
    977  * limitation.
    978  *
    979  * XXX per-destination/type check necessary?
    980  */
    981 static int
    982 icmp_ratelimit(dst, type, code)
    983 	const struct in_addr *dst;	/* not used at this moment */
    984 	const int type;			/* not used at this moment */
    985 	const int code;			/* not used at this moment */
    986 {
    987 	static struct timeval icmperrratelim_last;
    988 
    989 	/*
    990 	 * ratecheck() returns true if it is okay to send.  We return
    991 	 * true if it is not okay to send.
    992 	 */
    993 	return (ratecheck(&icmperrratelim_last, &icmperrratelim) == 0);
    994 }
    995