Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.41
      1 /*	$NetBSD: ip_icmp.c,v 1.41 2000/02/17 10:59:35 darrenr Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * Redistribution and use in source and binary forms, with or without
     41  * modification, are permitted provided that the following conditions
     42  * are met:
     43  * 1. Redistributions of source code must retain the above copyright
     44  *    notice, this list of conditions and the following disclaimer.
     45  * 2. Redistributions in binary form must reproduce the above copyright
     46  *    notice, this list of conditions and the following disclaimer in the
     47  *    documentation and/or other materials provided with the distribution.
     48  * 3. All advertising materials mentioning features or use of this software
     49  *    must display the following acknowledgement:
     50  *	This product includes software developed by the NetBSD
     51  *	Foundation, Inc. and its contributors.
     52  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  *    contributors may be used to endorse or promote products derived
     54  *    from this software without specific prior written permission.
     55  *
     56  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  * POSSIBILITY OF SUCH DAMAGE.
     67  */
     68 
     69 /*
     70  * Copyright (c) 1982, 1986, 1988, 1993
     71  *	The Regents of the University of California.  All rights reserved.
     72  *
     73  * Redistribution and use in source and binary forms, with or without
     74  * modification, are permitted provided that the following conditions
     75  * are met:
     76  * 1. Redistributions of source code must retain the above copyright
     77  *    notice, this list of conditions and the following disclaimer.
     78  * 2. Redistributions in binary form must reproduce the above copyright
     79  *    notice, this list of conditions and the following disclaimer in the
     80  *    documentation and/or other materials provided with the distribution.
     81  * 3. All advertising materials mentioning features or use of this software
     82  *    must display the following acknowledgement:
     83  *	This product includes software developed by the University of
     84  *	California, Berkeley and its contributors.
     85  * 4. Neither the name of the University nor the names of its contributors
     86  *    may be used to endorse or promote products derived from this software
     87  *    without specific prior written permission.
     88  *
     89  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99  * SUCH DAMAGE.
    100  *
    101  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    102  */
    103 
    104 #include "opt_ipsec.h"
    105 
    106 #include <sys/param.h>
    107 #include <sys/systm.h>
    108 #include <sys/malloc.h>
    109 #include <sys/mbuf.h>
    110 #include <sys/protosw.h>
    111 #include <sys/socket.h>
    112 #include <sys/time.h>
    113 #include <sys/kernel.h>
    114 #include <sys/proc.h>
    115 
    116 #include <vm/vm.h>
    117 #include <sys/sysctl.h>
    118 
    119 #include <net/if.h>
    120 #include <net/route.h>
    121 
    122 #include <netinet/in.h>
    123 #include <netinet/in_systm.h>
    124 #include <netinet/in_var.h>
    125 #include <netinet/ip.h>
    126 #include <netinet/ip_icmp.h>
    127 #include <netinet/ip_var.h>
    128 #include <netinet/in_pcb.h>
    129 #include <netinet/icmp_var.h>
    130 
    131 #ifdef IPSEC
    132 #include <netinet6/ipsec.h>
    133 #include <netkey/key.h>
    134 #include <netkey/key_debug.h>
    135 #endif
    136 
    137 #include <machine/stdarg.h>
    138 
    139 /*
    140  * ICMP routines: error generation, receive packet processing, and
    141  * routines to turnaround packets back to the originator, and
    142  * host table maintenance routines.
    143  */
    144 
    145 int	icmpmaskrepl = 0;
    146 #ifdef ICMPPRINTFS
    147 int	icmpprintfs = 0;
    148 #endif
    149 
    150 #if 0
    151 static int	ip_next_mtu __P((int, int));
    152 #else
    153 /*static*/ int	ip_next_mtu __P((int, int));
    154 #endif
    155 
    156 extern	struct timeval icmperrratelim;
    157 
    158 static void icmp_mtudisc __P((struct icmp *));
    159 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    160 
    161 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    162 
    163 /*
    164  * Generate an error packet of type error
    165  * in response to bad packet ip.
    166  */
    167 void
    168 icmp_error(n, type, code, dest, destifp)
    169 	struct mbuf *n;
    170 	int type, code;
    171 	n_long dest;
    172 	struct ifnet *destifp;
    173 {
    174 	register struct ip *oip = mtod(n, struct ip *), *nip;
    175 	register unsigned oiplen = oip->ip_hl << 2;
    176 	register struct icmp *icp;
    177 	register struct mbuf *m;
    178 	unsigned icmplen;
    179 
    180 #ifdef ICMPPRINTFS
    181 	if (icmpprintfs)
    182 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    183 #endif
    184 	if (type != ICMP_REDIRECT)
    185 		icmpstat.icps_error++;
    186 	/*
    187 	 * Don't send error if not the first fragment of message.
    188 	 * Don't error if the old packet protocol was ICMP
    189 	 * error message, only known informational types.
    190 	 */
    191 	if (oip->ip_off &~ (IP_MF|IP_DF))
    192 		goto freeit;
    193 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    194 	  n->m_len >= oiplen + ICMP_MINLEN &&
    195 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    196 		icmpstat.icps_oldicmp++;
    197 		goto freeit;
    198 	}
    199 	/* Don't send error in response to a multicast or broadcast packet */
    200 	if (n->m_flags & (M_BCAST|M_MCAST))
    201 		goto freeit;
    202 
    203 	/*
    204 	 * First, do a rate limitation check.
    205 	 */
    206 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    207 		/* XXX stat */
    208 		goto freeit;
    209 	}
    210 
    211 	/*
    212 	 * Now, formulate icmp message
    213 	 */
    214 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    215 	if (m == NULL)
    216 		goto freeit;
    217 	icmplen = oiplen + min(8, oip->ip_len - oiplen);
    218 	m->m_len = icmplen + ICMP_MINLEN;
    219 	MH_ALIGN(m, m->m_len);
    220 	icp = mtod(m, struct icmp *);
    221 	if ((u_int)type > ICMP_MAXTYPE)
    222 		panic("icmp_error");
    223 	icmpstat.icps_outhist[type]++;
    224 	icp->icmp_type = type;
    225 	if (type == ICMP_REDIRECT)
    226 		icp->icmp_gwaddr.s_addr = dest;
    227 	else {
    228 		icp->icmp_void = 0;
    229 		/*
    230 		 * The following assignments assume an overlay with the
    231 		 * zeroed icmp_void field.
    232 		 */
    233 		if (type == ICMP_PARAMPROB) {
    234 			icp->icmp_pptr = code;
    235 			code = 0;
    236 		} else if (type == ICMP_UNREACH &&
    237 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    238 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    239 	}
    240 
    241 	HTONS(oip->ip_off);
    242 	HTONS(oip->ip_len);
    243 	icp->icmp_code = code;
    244 	bcopy((caddr_t)oip, (caddr_t)&icp->icmp_ip, icmplen);
    245 	nip = &icp->icmp_ip;
    246 
    247 	/*
    248 	 * Now, copy old ip header (without options)
    249 	 * in front of icmp message.
    250 	 */
    251 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    252 		panic("icmp len");
    253 	m->m_data -= sizeof(struct ip);
    254 	m->m_len += sizeof(struct ip);
    255 	m->m_pkthdr.len = m->m_len;
    256 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    257 	nip = mtod(m, struct ip *);
    258 	bcopy((caddr_t)oip, (caddr_t)nip, sizeof(struct ip));
    259 	nip->ip_len = m->m_len;
    260 	nip->ip_hl = sizeof(struct ip) >> 2;
    261 	nip->ip_p = IPPROTO_ICMP;
    262 	nip->ip_tos = 0;
    263 	icmp_reflect(m);
    264 
    265 freeit:
    266 	m_freem(n);
    267 }
    268 
    269 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    270 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    271 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    272 struct sockaddr_in icmpmask = { 8, 0 };
    273 
    274 /*
    275  * Process a received ICMP message.
    276  */
    277 void
    278 #if __STDC__
    279 icmp_input(struct mbuf *m, ...)
    280 #else
    281 icmp_input(m, va_alist)
    282 	struct mbuf *m;
    283 	va_dcl
    284 #endif
    285 {
    286 	int proto;
    287 	register struct icmp *icp;
    288 	register struct ip *ip = mtod(m, struct ip *);
    289 	int icmplen;
    290 	register int i;
    291 	struct in_ifaddr *ia;
    292 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    293 	int code;
    294 	int hlen;
    295 	va_list ap;
    296 
    297 	va_start(ap, m);
    298 	hlen = va_arg(ap, int);
    299 	proto = va_arg(ap, int);
    300 	va_end(ap);
    301 
    302 	/*
    303 	 * Locate icmp structure in mbuf, and check
    304 	 * that not corrupted and of at least minimum length.
    305 	 */
    306 	icmplen = ip->ip_len - hlen;
    307 #ifdef ICMPPRINTFS
    308 	if (icmpprintfs)
    309 		printf("icmp_input from %x to %x, len %d\n",
    310 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    311 		    icmplen);
    312 #endif
    313 	if (icmplen < ICMP_MINLEN) {
    314 		icmpstat.icps_tooshort++;
    315 		goto freeit;
    316 	}
    317 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    318 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    319 		icmpstat.icps_tooshort++;
    320 		return;
    321 	}
    322 	ip = mtod(m, struct ip *);
    323 	m->m_len -= hlen;
    324 	m->m_data += hlen;
    325 	icp = mtod(m, struct icmp *);
    326 	if (in_cksum(m, icmplen)) {
    327 		icmpstat.icps_checksum++;
    328 		goto freeit;
    329 	}
    330 	m->m_len += hlen;
    331 	m->m_data -= hlen;
    332 
    333 #ifdef ICMPPRINTFS
    334 	/*
    335 	 * Message type specific processing.
    336 	 */
    337 	if (icmpprintfs)
    338 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    339 		    icp->icmp_code);
    340 #endif
    341 #ifdef IPSEC
    342 	/* drop it if it does not match the policy */
    343 	if (ipsec4_in_reject(m, NULL)) {
    344 		ipsecstat.in_polvio++;
    345 		goto freeit;
    346 	}
    347 #endif
    348 	if (icp->icmp_type > ICMP_MAXTYPE)
    349 		goto raw;
    350 	icmpstat.icps_inhist[icp->icmp_type]++;
    351 	code = icp->icmp_code;
    352 	switch (icp->icmp_type) {
    353 
    354 	case ICMP_UNREACH:
    355 		switch (code) {
    356 			case ICMP_UNREACH_NET:
    357 			case ICMP_UNREACH_HOST:
    358 			case ICMP_UNREACH_PROTOCOL:
    359 			case ICMP_UNREACH_PORT:
    360 			case ICMP_UNREACH_SRCFAIL:
    361 				code += PRC_UNREACH_NET;
    362 				break;
    363 
    364 			case ICMP_UNREACH_NEEDFRAG:
    365 				code = PRC_MSGSIZE;
    366 				break;
    367 
    368 			case ICMP_UNREACH_NET_UNKNOWN:
    369 			case ICMP_UNREACH_NET_PROHIB:
    370 			case ICMP_UNREACH_TOSNET:
    371 				code = PRC_UNREACH_NET;
    372 				break;
    373 
    374 			case ICMP_UNREACH_HOST_UNKNOWN:
    375 			case ICMP_UNREACH_ISOLATED:
    376 			case ICMP_UNREACH_HOST_PROHIB:
    377 			case ICMP_UNREACH_TOSHOST:
    378 				code = PRC_UNREACH_HOST;
    379 				break;
    380 
    381 			default:
    382 				goto badcode;
    383 		}
    384 		goto deliver;
    385 
    386 	case ICMP_TIMXCEED:
    387 		if (code > 1)
    388 			goto badcode;
    389 		code += PRC_TIMXCEED_INTRANS;
    390 		goto deliver;
    391 
    392 	case ICMP_PARAMPROB:
    393 		if (code > 1)
    394 			goto badcode;
    395 		code = PRC_PARAMPROB;
    396 		goto deliver;
    397 
    398 	case ICMP_SOURCEQUENCH:
    399 		if (code)
    400 			goto badcode;
    401 		code = PRC_QUENCH;
    402 		goto deliver;
    403 
    404 	deliver:
    405 		/*
    406 		 * Problem with datagram; advise higher level routines.
    407 		 */
    408 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    409 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    410 			icmpstat.icps_badlen++;
    411 			goto freeit;
    412 		}
    413 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    414 			goto badcode;
    415 		NTOHS(icp->icmp_ip.ip_len);
    416 #ifdef ICMPPRINTFS
    417 		if (icmpprintfs)
    418 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    419 #endif
    420 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    421 		if (code == PRC_MSGSIZE && ip_mtudisc)
    422 			icmp_mtudisc(icp);
    423 		/*
    424 		 * XXX if the packet contains [IPv4 AH TCP], we can't make a
    425 		 * notification to TCP layer.
    426 		 */
    427 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    428 		if (ctlfunc)
    429 			(*ctlfunc)(code, sintosa(&icmpsrc), &icp->icmp_ip);
    430 		break;
    431 
    432 	badcode:
    433 		icmpstat.icps_badcode++;
    434 		break;
    435 
    436 	case ICMP_ECHO:
    437 		icp->icmp_type = ICMP_ECHOREPLY;
    438 		goto reflect;
    439 
    440 	case ICMP_TSTAMP:
    441 		if (icmplen < ICMP_TSLEN) {
    442 			icmpstat.icps_badlen++;
    443 			break;
    444 		}
    445 		icp->icmp_type = ICMP_TSTAMPREPLY;
    446 		icp->icmp_rtime = iptime();
    447 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    448 		goto reflect;
    449 
    450 	case ICMP_MASKREQ:
    451 		if (icmpmaskrepl == 0)
    452 			break;
    453 		/*
    454 		 * We are not able to respond with all ones broadcast
    455 		 * unless we receive it over a point-to-point interface.
    456 		 */
    457 		if (icmplen < ICMP_MASKLEN) {
    458 			icmpstat.icps_badlen++;
    459 			break;
    460 		}
    461 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    462 		    in_nullhost(ip->ip_dst))
    463 			icmpdst.sin_addr = ip->ip_src;
    464 		else
    465 			icmpdst.sin_addr = ip->ip_dst;
    466 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    467 		    m->m_pkthdr.rcvif));
    468 		if (ia == 0)
    469 			break;
    470 		icp->icmp_type = ICMP_MASKREPLY;
    471 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    472 		if (in_nullhost(ip->ip_src)) {
    473 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    474 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    475 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    476 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    477 		}
    478 reflect:
    479 		icmpstat.icps_reflect++;
    480 		icmpstat.icps_outhist[icp->icmp_type]++;
    481 		icmp_reflect(m);
    482 		return;
    483 
    484 	case ICMP_REDIRECT:
    485 		if (code > 3)
    486 			goto badcode;
    487 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    488 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    489 			icmpstat.icps_badlen++;
    490 			break;
    491 		}
    492 		/*
    493 		 * Short circuit routing redirects to force
    494 		 * immediate change in the kernel's routing
    495 		 * tables.  The message is also handed to anyone
    496 		 * listening on a raw socket (e.g. the routing
    497 		 * daemon for use in updating its tables).
    498 		 */
    499 		icmpgw.sin_addr = ip->ip_src;
    500 		icmpdst.sin_addr = icp->icmp_gwaddr;
    501 #ifdef	ICMPPRINTFS
    502 		if (icmpprintfs)
    503 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    504 			    icp->icmp_gwaddr);
    505 #endif
    506 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    507 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    508 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    509 		    sintosa(&icmpgw), (struct rtentry **)0);
    510 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    511 #ifdef IPSEC
    512 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    513 #endif
    514 		break;
    515 
    516 	/*
    517 	 * No kernel processing for the following;
    518 	 * just fall through to send to raw listener.
    519 	 */
    520 	case ICMP_ECHOREPLY:
    521 	case ICMP_ROUTERADVERT:
    522 	case ICMP_ROUTERSOLICIT:
    523 	case ICMP_TSTAMPREPLY:
    524 	case ICMP_IREQREPLY:
    525 	case ICMP_MASKREPLY:
    526 	default:
    527 		break;
    528 	}
    529 
    530 raw:
    531 	rip_input(m, hlen, proto);
    532 	return;
    533 
    534 freeit:
    535 	m_freem(m);
    536 	return;
    537 }
    538 
    539 /*
    540  * Reflect the ip packet back to the source
    541  */
    542 void
    543 icmp_reflect(m)
    544 	struct mbuf *m;
    545 {
    546 	register struct ip *ip = mtod(m, struct ip *);
    547 	register struct in_ifaddr *ia;
    548 	register struct ifaddr *ifa;
    549 	struct sockaddr_in *sin = 0;
    550 	struct in_addr t;
    551 	struct mbuf *opts = 0;
    552 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    553 
    554 	if (!in_canforward(ip->ip_src) &&
    555 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    556 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    557 		m_freem(m);	/* Bad return address */
    558 		goto done;	/* ip_output() will check for broadcast */
    559 	}
    560 	t = ip->ip_dst;
    561 	ip->ip_dst = ip->ip_src;
    562 	/*
    563 	 * If the incoming packet was addressed directly to us, use
    564 	 * dst as the src for the reply.  Otherwise (broadcast or
    565 	 * anonymous), use an address which corresponds to the
    566 	 * incoming interface, with a preference for the address which
    567 	 * corresponds to the route to the destination of the ICMP.
    568 	 */
    569 
    570 	/* Look for packet addressed to us */
    571 	INADDR_TO_IA(t, ia);
    572 
    573 	/* look for packet sent to broadcast address */
    574 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    575 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    576 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    577 			if (ifa->ifa_addr->sa_family != AF_INET)
    578 				continue;
    579 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    580 				ia = ifatoia(ifa);
    581 				break;
    582 			}
    583 		}
    584 	}
    585 
    586 	if (ia)
    587 		sin = &ia->ia_addr;
    588 
    589 	icmpdst.sin_addr = t;
    590 
    591 	/* if the packet is addressed somewhere else, compute the
    592 	   source address for packets routed back to the source, and
    593 	   use that, if it's an address on the interface which
    594 	   received the packet */
    595 	if (sin == (struct sockaddr_in *)0) {
    596 		struct sockaddr_in sin_dst;
    597 		struct route icmproute;
    598 		int errornum;
    599 
    600 		sin_dst.sin_family = AF_INET;
    601 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    602 		sin_dst.sin_addr = ip->ip_dst;
    603 		bzero(&icmproute, sizeof(icmproute));
    604 		errornum = 0;
    605 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    606 		/* errornum is never used */
    607 		if (icmproute.ro_rt)
    608 			RTFREE(icmproute.ro_rt);
    609 		/* check to make sure sin is a source address on rcvif */
    610 		if (sin) {
    611 			t = sin->sin_addr;
    612 			sin = (struct sockaddr_in *)0;
    613 			INADDR_TO_IA(t, ia);
    614 			while (ia) {
    615 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    616 					sin = &ia->ia_addr;
    617 					break;
    618 				}
    619 				NEXT_IA_WITH_SAME_ADDR(ia);
    620 			}
    621 		}
    622 	}
    623 
    624 	/* if it was not addressed to us, but the route doesn't go out
    625 	   the source interface, pick an address on the source
    626 	   interface.  This can happen when routing is asymmetric, or
    627 	   when the incoming packet was encapsulated */
    628 	if (sin == (struct sockaddr_in *)0) {
    629 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    630 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    631 			if (ifa->ifa_addr->sa_family != AF_INET)
    632 				continue;
    633 			sin = &(ifatoia(ifa)->ia_addr);
    634 			break;
    635 		}
    636 	}
    637 
    638 	/*
    639 	 * The following happens if the packet was not addressed to us,
    640 	 * and was received on an interface with no IP address:
    641 	 * We find the first AF_INET address on the first non-loopback
    642 	 * interface.
    643 	 */
    644 	if (sin == (struct sockaddr_in *)0)
    645 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    646 		    ia = ia->ia_list.tqe_next) {
    647 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    648 				continue;
    649 			sin = &ia->ia_addr;
    650 			break;
    651 		}
    652 
    653 	/*
    654 	 * If we still didn't find an address, punt.  We could have an
    655 	 * interface up (and receiving packets) with no address.
    656 	 */
    657 	if (sin == (struct sockaddr_in *)0) {
    658 		m_freem(m);
    659 		goto done;
    660 	}
    661 
    662 	ip->ip_src = sin->sin_addr;
    663 	ip->ip_ttl = MAXTTL;
    664 
    665 	if (optlen > 0) {
    666 		register u_char *cp;
    667 		int opt, cnt;
    668 		u_int len;
    669 
    670 		/*
    671 		 * Retrieve any source routing from the incoming packet;
    672 		 * add on any record-route or timestamp options.
    673 		 */
    674 		cp = (u_char *) (ip + 1);
    675 		if ((opts = ip_srcroute()) == 0 &&
    676 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    677 			opts->m_len = sizeof(struct in_addr);
    678 			*mtod(opts, struct in_addr *) = zeroin_addr;
    679 		}
    680 		if (opts) {
    681 #ifdef ICMPPRINTFS
    682 		    if (icmpprintfs)
    683 			    printf("icmp_reflect optlen %d rt %d => ",
    684 				optlen, opts->m_len);
    685 #endif
    686 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    687 			    opt = cp[IPOPT_OPTVAL];
    688 			    if (opt == IPOPT_EOL)
    689 				    break;
    690 			    if (opt == IPOPT_NOP)
    691 				    len = 1;
    692 			    else {
    693 				    len = cp[IPOPT_OLEN];
    694 				    if (len <= 0 || len > cnt)
    695 					    break;
    696 			    }
    697 			    /*
    698 			     * Should check for overflow, but it "can't happen"
    699 			     */
    700 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    701 				opt == IPOPT_SECURITY) {
    702 				    bcopy((caddr_t)cp,
    703 					mtod(opts, caddr_t) + opts->m_len, len);
    704 				    opts->m_len += len;
    705 			    }
    706 		    }
    707 		    /* Terminate & pad, if necessary */
    708 		    if ((cnt = opts->m_len % 4) != 0) {
    709 			    for (; cnt < 4; cnt++) {
    710 				    *(mtod(opts, caddr_t) + opts->m_len) =
    711 					IPOPT_EOL;
    712 				    opts->m_len++;
    713 			    }
    714 		    }
    715 #ifdef ICMPPRINTFS
    716 		    if (icmpprintfs)
    717 			    printf("%d\n", opts->m_len);
    718 #endif
    719 		}
    720 		/*
    721 		 * Now strip out original options by copying rest of first
    722 		 * mbuf's data back, and adjust the IP length.
    723 		 */
    724 		ip->ip_len -= optlen;
    725 		ip->ip_hl = sizeof(struct ip) >> 2;
    726 		m->m_len -= optlen;
    727 		if (m->m_flags & M_PKTHDR)
    728 			m->m_pkthdr.len -= optlen;
    729 		optlen += sizeof(struct ip);
    730 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    731 			 (unsigned)(m->m_len - sizeof(struct ip)));
    732 	}
    733 	m->m_flags &= ~(M_BCAST|M_MCAST);
    734 	icmp_send(m, opts);
    735 done:
    736 	if (opts)
    737 		(void)m_free(opts);
    738 }
    739 
    740 /*
    741  * Send an icmp packet back to the ip level,
    742  * after supplying a checksum.
    743  */
    744 void
    745 icmp_send(m, opts)
    746 	register struct mbuf *m;
    747 	struct mbuf *opts;
    748 {
    749 	register struct ip *ip = mtod(m, struct ip *);
    750 	register int hlen;
    751 	register struct icmp *icp;
    752 
    753 	hlen = ip->ip_hl << 2;
    754 	m->m_data += hlen;
    755 	m->m_len -= hlen;
    756 	icp = mtod(m, struct icmp *);
    757 	icp->icmp_cksum = 0;
    758 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    759 	m->m_data -= hlen;
    760 	m->m_len += hlen;
    761 #ifdef ICMPPRINTFS
    762 	if (icmpprintfs)
    763 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    764 #endif
    765 #ifdef IPSEC
    766 	m->m_pkthdr.rcvif = NULL;
    767 #endif /*IPSEC*/
    768 	(void) ip_output(m, opts, NULL, 0, NULL);
    769 }
    770 
    771 n_time
    772 iptime()
    773 {
    774 	struct timeval atv;
    775 	u_long t;
    776 
    777 	microtime(&atv);
    778 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    779 	return (htonl(t));
    780 }
    781 
    782 int
    783 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    784 	int *name;
    785 	u_int namelen;
    786 	void *oldp;
    787 	size_t *oldlenp;
    788 	void *newp;
    789 	size_t newlen;
    790 {
    791 
    792 	/* All sysctl names at this level are terminal. */
    793 	if (namelen != 1)
    794 		return (ENOTDIR);
    795 
    796 	switch (name[0]) {
    797 	case ICMPCTL_MASKREPL:
    798 		return (sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl));
    799 	case ICMPCTL_ERRRATELIMIT:
    800 	    {
    801 		int rate_usec, error, s;
    802 
    803 		/*
    804 		 * The sysctl specifies the rate in usec-between-icmp,
    805 		 * so we must convert from/to a timeval.
    806 		 */
    807 		rate_usec = (icmperrratelim.tv_sec * 1000000) +
    808 		    icmperrratelim.tv_usec;
    809 		error = sysctl_int(oldp, oldlenp, newp, newlen, &rate_usec);
    810 		if (error)
    811 			return (error);
    812 		s = splsoftnet();
    813 		icmperrratelim.tv_sec = rate_usec / 1000000;
    814 		icmperrratelim.tv_usec = rate_usec % 1000000;
    815 		splx(s);
    816 
    817 		return (0);
    818 	    }
    819 	default:
    820 		return (ENOPROTOOPT);
    821 	}
    822 	/* NOTREACHED */
    823 }
    824 
    825 static void
    826 icmp_mtudisc(icp)
    827 	struct icmp *icp;
    828 {
    829 	struct rtentry *rt;
    830 	struct sockaddr *dst = sintosa(&icmpsrc);
    831 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    832 	int    error;
    833 
    834 	/* Table of common MTUs: */
    835 
    836 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    837 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    838 
    839 	rt = rtalloc1(dst, 1);
    840 	if (rt == 0)
    841 		return;
    842 
    843 	/* If we didn't get a host route, allocate one */
    844 
    845 	if ((rt->rt_flags & RTF_HOST) == 0) {
    846 		struct rtentry *nrt;
    847 
    848 		error = rtrequest((int) RTM_ADD, dst,
    849 		    (struct sockaddr *) rt->rt_gateway,
    850 		    (struct sockaddr *) 0,
    851 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    852 		if (error) {
    853 			rtfree(rt);
    854 			rtfree(nrt);
    855 			return;
    856 		}
    857 		nrt->rt_rmx = rt->rt_rmx;
    858 		rtfree(rt);
    859 		rt = nrt;
    860 	}
    861 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    862 	if (error) {
    863 		rtfree(rt);
    864 		return;
    865 	}
    866 
    867 	if (mtu == 0) {
    868 		int i = 0;
    869 
    870 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    871 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    872 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    873 			mtu -= (icp->icmp_ip.ip_hl << 2);
    874 
    875 		/* If we still can't guess a value, try the route */
    876 
    877 		if (mtu == 0) {
    878 			mtu = rt->rt_rmx.rmx_mtu;
    879 
    880 			/* If no route mtu, default to the interface mtu */
    881 
    882 			if (mtu == 0)
    883 				mtu = rt->rt_ifp->if_mtu;
    884 		}
    885 
    886 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    887 			if (mtu > mtu_table[i]) {
    888 				mtu = mtu_table[i];
    889 				break;
    890 			}
    891 	}
    892 
    893 	/*
    894 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    895 	 *	  to turn off PMTU for a route, and the kernel can
    896 	 *	  set it to indicate a serious problem with PMTU
    897 	 *	  on a route.  We should be using a separate flag
    898 	 *	  for the kernel to indicate this.
    899 	 */
    900 
    901 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    902 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    903 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    904 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    905 			 rt->rt_rmx.rmx_mtu == 0)
    906 			rt->rt_rmx.rmx_mtu = mtu;
    907 	}
    908 
    909 	if (rt)
    910 		rtfree(rt);
    911 }
    912 
    913 /*
    914  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    915  * given current value MTU.  If DIR is less than zero, a larger plateau
    916  * is returned; otherwise, a smaller value is returned.
    917  */
    918 int
    919 ip_next_mtu(mtu, dir)	/* XXX */
    920 	int mtu;
    921 	int dir;
    922 {
    923 	static int mtutab[] = {
    924 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
    925 		68, 0
    926 	};
    927 	int i;
    928 
    929 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
    930 		if (mtu >= mtutab[i])
    931 			break;
    932 	}
    933 
    934 	if (dir < 0) {
    935 		if (i == 0) {
    936 			return 0;
    937 		} else {
    938 			return mtutab[i - 1];
    939 		}
    940 	} else {
    941 		if (mtutab[i] == 0) {
    942 			return 0;
    943 		} else if(mtu > mtutab[i]) {
    944 			return mtutab[i];
    945 		} else {
    946 			return mtutab[i + 1];
    947 		}
    948 	}
    949 }
    950 
    951 static void
    952 icmp_mtudisc_timeout(rt, r)
    953 	struct rtentry *rt;
    954 	struct rttimer *r;
    955 {
    956 	if (rt == NULL)
    957 		panic("icmp_mtudisc_timeout:  bad route to timeout");
    958 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
    959 	    (RTF_DYNAMIC | RTF_HOST)) {
    960 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
    961 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
    962 	} else {
    963 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    964 			rt->rt_rmx.rmx_mtu = 0;
    965 		}
    966 	}
    967 }
    968 
    969 /*
    970  * Perform rate limit check.
    971  * Returns 0 if it is okay to send the icmp packet.
    972  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
    973  * limitation.
    974  *
    975  * XXX per-destination/type check necessary?
    976  */
    977 static int
    978 icmp_ratelimit(dst, type, code)
    979 	const struct in_addr *dst;	/* not used at this moment */
    980 	const int type;			/* not used at this moment */
    981 	const int code;			/* not used at this moment */
    982 {
    983 	static struct timeval icmperrratelim_last;
    984 
    985 	/*
    986 	 * ratecheck() returns true if it is okay to send.  We return
    987 	 * true if it is not okay to send.
    988 	 */
    989 	return (ratecheck(&icmperrratelim_last, &icmperrratelim) == 0);
    990 }
    991