Home | History | Annotate | Line # | Download | only in netinet
ip_icmp.c revision 1.55
      1 /*	$NetBSD: ip_icmp.c,v 1.55 2000/10/18 17:09:14 thorpej Exp $	*/
      2 
      3 /*
      4  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the project nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1998, 2000 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * This code is derived from software contributed to The NetBSD Foundation
     37  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  *
     40  * This code is derived from software contributed to The NetBSD Foundation
     41  * by Jason R. Thorpe of Zembu Labs, Inc.
     42  *
     43  * Redistribution and use in source and binary forms, with or without
     44  * modification, are permitted provided that the following conditions
     45  * are met:
     46  * 1. Redistributions of source code must retain the above copyright
     47  *    notice, this list of conditions and the following disclaimer.
     48  * 2. Redistributions in binary form must reproduce the above copyright
     49  *    notice, this list of conditions and the following disclaimer in the
     50  *    documentation and/or other materials provided with the distribution.
     51  * 3. All advertising materials mentioning features or use of this software
     52  *    must display the following acknowledgement:
     53  *	This product includes software developed by the NetBSD
     54  *	Foundation, Inc. and its contributors.
     55  * 4. Neither the name of The NetBSD Foundation nor the names of its
     56  *    contributors may be used to endorse or promote products derived
     57  *    from this software without specific prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     60  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     61  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     62  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     63  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     64  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     65  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     66  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     67  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     68  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     69  * POSSIBILITY OF SUCH DAMAGE.
     70  */
     71 
     72 /*
     73  * Copyright (c) 1982, 1986, 1988, 1993
     74  *	The Regents of the University of California.  All rights reserved.
     75  *
     76  * Redistribution and use in source and binary forms, with or without
     77  * modification, are permitted provided that the following conditions
     78  * are met:
     79  * 1. Redistributions of source code must retain the above copyright
     80  *    notice, this list of conditions and the following disclaimer.
     81  * 2. Redistributions in binary form must reproduce the above copyright
     82  *    notice, this list of conditions and the following disclaimer in the
     83  *    documentation and/or other materials provided with the distribution.
     84  * 3. All advertising materials mentioning features or use of this software
     85  *    must display the following acknowledgement:
     86  *	This product includes software developed by the University of
     87  *	California, Berkeley and its contributors.
     88  * 4. Neither the name of the University nor the names of its contributors
     89  *    may be used to endorse or promote products derived from this software
     90  *    without specific prior written permission.
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     93  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     95  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     96  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     97  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     98  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    100  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    101  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    102  * SUCH DAMAGE.
    103  *
    104  *	@(#)ip_icmp.c	8.2 (Berkeley) 1/4/94
    105  */
    106 
    107 #include "opt_ipsec.h"
    108 
    109 #include <sys/param.h>
    110 #include <sys/systm.h>
    111 #include <sys/malloc.h>
    112 #include <sys/mbuf.h>
    113 #include <sys/protosw.h>
    114 #include <sys/socket.h>
    115 #include <sys/time.h>
    116 #include <sys/kernel.h>
    117 #include <sys/proc.h>
    118 
    119 #include <uvm/uvm_extern.h>
    120 
    121 #include <sys/sysctl.h>
    122 
    123 #include <net/if.h>
    124 #include <net/route.h>
    125 
    126 #include <netinet/in.h>
    127 #include <netinet/in_systm.h>
    128 #include <netinet/in_var.h>
    129 #include <netinet/ip.h>
    130 #include <netinet/ip_icmp.h>
    131 #include <netinet/ip_var.h>
    132 #include <netinet/in_pcb.h>
    133 #include <netinet/icmp_var.h>
    134 
    135 #ifdef IPSEC
    136 #include <netinet6/ipsec.h>
    137 #include <netkey/key.h>
    138 #endif
    139 
    140 #include <machine/stdarg.h>
    141 
    142 /*
    143  * ICMP routines: error generation, receive packet processing, and
    144  * routines to turnaround packets back to the originator, and
    145  * host table maintenance routines.
    146  */
    147 
    148 int	icmpmaskrepl = 0;
    149 #ifdef ICMPPRINTFS
    150 int	icmpprintfs = 0;
    151 #endif
    152 int	icmpreturndatabytes = 8;
    153 
    154 /*
    155  * List of callbacks to notify when Path MTU changes are made.
    156  */
    157 struct mtudisc_callback {
    158 	LIST_ENTRY(mtudisc_callback) mc_list;
    159 	void (*mc_func) __P((struct in_addr));
    160 };
    161 
    162 LIST_HEAD(, mtudisc_callback) icmp_mtudisc_callbacks =
    163     LIST_HEAD_INITIALIZER(&icmp_mtudisc_callbacks);
    164 
    165 #if 0
    166 static int	ip_next_mtu __P((int, int));
    167 #else
    168 /*static*/ int	ip_next_mtu __P((int, int));
    169 #endif
    170 
    171 extern int icmperrppslim;
    172 static int icmperrpps_count = 0;
    173 static struct timeval icmperrppslim_last;
    174 
    175 static void icmp_mtudisc_timeout __P((struct rtentry *, struct rttimer *));
    176 
    177 static int icmp_ratelimit __P((const struct in_addr *, const int, const int));
    178 
    179 /*
    180  * Register a Path MTU Discovery callback.
    181  */
    182 void
    183 icmp_mtudisc_callback_register(func)
    184 	void (*func) __P((struct in_addr));
    185 {
    186 	struct mtudisc_callback *mc;
    187 
    188 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    189 	     mc = LIST_NEXT(mc, mc_list)) {
    190 		if (mc->mc_func == func)
    191 			return;
    192 	}
    193 
    194 	mc = malloc(sizeof(*mc), M_PCB, M_NOWAIT);
    195 	if (mc == NULL)
    196 		panic("icmp_mtudisc_callback_register");
    197 
    198 	mc->mc_func = func;
    199 	LIST_INSERT_HEAD(&icmp_mtudisc_callbacks, mc, mc_list);
    200 }
    201 
    202 /*
    203  * Generate an error packet of type error
    204  * in response to bad packet ip.
    205  */
    206 void
    207 icmp_error(n, type, code, dest, destifp)
    208 	struct mbuf *n;
    209 	int type, code;
    210 	n_long dest;
    211 	struct ifnet *destifp;
    212 {
    213 	struct ip *oip = mtod(n, struct ip *), *nip;
    214 	unsigned oiplen = oip->ip_hl << 2;
    215 	struct icmp *icp;
    216 	struct mbuf *m;
    217 	unsigned icmplen, mblen;
    218 
    219 #ifdef ICMPPRINTFS
    220 	if (icmpprintfs)
    221 		printf("icmp_error(%x, %d, %d)\n", oip, type, code);
    222 #endif
    223 	if (type != ICMP_REDIRECT)
    224 		icmpstat.icps_error++;
    225 	/*
    226 	 * Don't send error if the original packet was encrypted.
    227 	 * Don't send error if not the first fragment of message.
    228 	 * Don't error if the old packet protocol was ICMP
    229 	 * error message, only known informational types.
    230 	 */
    231 	if (n->m_flags & M_DECRYPTED)
    232 		goto freeit;
    233 	if (oip->ip_off &~ (IP_MF|IP_DF))
    234 		goto freeit;
    235 	if (oip->ip_p == IPPROTO_ICMP && type != ICMP_REDIRECT &&
    236 	  n->m_len >= oiplen + ICMP_MINLEN &&
    237 	  !ICMP_INFOTYPE(((struct icmp *)((caddr_t)oip + oiplen))->icmp_type)) {
    238 		icmpstat.icps_oldicmp++;
    239 		goto freeit;
    240 	}
    241 	/* Don't send error in response to a multicast or broadcast packet */
    242 	if (n->m_flags & (M_BCAST|M_MCAST))
    243 		goto freeit;
    244 
    245 	/*
    246 	 * First, do a rate limitation check.
    247 	 */
    248 	if (icmp_ratelimit(&oip->ip_src, type, code)) {
    249 		/* XXX stat */
    250 		goto freeit;
    251 	}
    252 
    253 	/*
    254 	 * Now, formulate icmp message
    255 	 */
    256 	icmplen = oiplen + min(icmpreturndatabytes, oip->ip_len - oiplen);
    257 	/*
    258 	 * Defend against mbuf chains shorter than oip->ip_len:
    259 	 */
    260 	mblen = 0;
    261 	for (m = n; m && (mblen < icmplen); m = m->m_next)
    262 		mblen += m->m_len;
    263 	icmplen = min(mblen, icmplen);
    264 
    265 	/*
    266 	 * As we are not required to return everything we have,
    267 	 * we return whatever we can return at ease.
    268 	 *
    269 	 * Note that ICMP datagrams longer than 576 octets are out of spec
    270 	 * according to RFC1812; the limit on icmpreturndatabytes below in
    271 	 * icmp_sysctl will keep things below that limit.
    272 	 */
    273 
    274 	KASSERT(ICMP_MINLEN <= MCLBYTES);
    275 
    276 	if (icmplen + ICMP_MINLEN > MCLBYTES)
    277 		icmplen = MCLBYTES - ICMP_MINLEN;
    278 
    279 	m = m_gethdr(M_DONTWAIT, MT_HEADER);
    280 	if (m && (icmplen + ICMP_MINLEN > MHLEN)) {
    281 		MCLGET(m, M_DONTWAIT);
    282 		if ((m->m_flags & M_EXT) == 0) {
    283 			m_freem(m);
    284 			m = NULL;
    285 		}
    286 	}
    287 	if (m == NULL)
    288 		goto freeit;
    289 	m->m_len = icmplen + ICMP_MINLEN;
    290 	if ((m->m_flags & M_EXT) == 0)
    291 		MH_ALIGN(m, m->m_len);
    292 	icp = mtod(m, struct icmp *);
    293 	if ((u_int)type > ICMP_MAXTYPE)
    294 		panic("icmp_error");
    295 	icmpstat.icps_outhist[type]++;
    296 	icp->icmp_type = type;
    297 	if (type == ICMP_REDIRECT)
    298 		icp->icmp_gwaddr.s_addr = dest;
    299 	else {
    300 		icp->icmp_void = 0;
    301 		/*
    302 		 * The following assignments assume an overlay with the
    303 		 * zeroed icmp_void field.
    304 		 */
    305 		if (type == ICMP_PARAMPROB) {
    306 			icp->icmp_pptr = code;
    307 			code = 0;
    308 		} else if (type == ICMP_UNREACH &&
    309 		    code == ICMP_UNREACH_NEEDFRAG && destifp)
    310 			icp->icmp_nextmtu = htons(destifp->if_mtu);
    311 	}
    312 
    313 	HTONS(oip->ip_off);
    314 	HTONS(oip->ip_len);
    315 	icp->icmp_code = code;
    316 	m_copydata(n, 0, icmplen, (caddr_t)&icp->icmp_ip);
    317 	nip = &icp->icmp_ip;
    318 
    319 	/*
    320 	 * Now, copy old ip header (without options)
    321 	 * in front of icmp message.
    322 	 */
    323 	if (m->m_data - sizeof(struct ip) < m->m_pktdat)
    324 		panic("icmp len");
    325 	m->m_data -= sizeof(struct ip);
    326 	m->m_len += sizeof(struct ip);
    327 	m->m_pkthdr.len = m->m_len;
    328 	m->m_pkthdr.rcvif = n->m_pkthdr.rcvif;
    329 	nip = mtod(m, struct ip *);
    330 	/* ip_v set in ip_output */
    331 	nip->ip_hl = sizeof(struct ip) >> 2;
    332 	nip->ip_tos = 0;
    333 	nip->ip_len = m->m_len;
    334 	/* ip_id set in ip_output */
    335 	nip->ip_off = 0;
    336 	/* ip_ttl set in icmp_reflect */
    337 	nip->ip_p = IPPROTO_ICMP;
    338 	nip->ip_src = oip->ip_src;
    339 	nip->ip_dst = oip->ip_dst;
    340 	icmp_reflect(m);
    341 
    342 freeit:
    343 	m_freem(n);
    344 }
    345 
    346 static struct sockaddr_in icmpsrc = { sizeof (struct sockaddr_in), AF_INET };
    347 static struct sockaddr_in icmpdst = { sizeof (struct sockaddr_in), AF_INET };
    348 static struct sockaddr_in icmpgw = { sizeof (struct sockaddr_in), AF_INET };
    349 struct sockaddr_in icmpmask = { 8, 0 };
    350 
    351 /*
    352  * Process a received ICMP message.
    353  */
    354 void
    355 #if __STDC__
    356 icmp_input(struct mbuf *m, ...)
    357 #else
    358 icmp_input(m, va_alist)
    359 	struct mbuf *m;
    360 	va_dcl
    361 #endif
    362 {
    363 	int proto;
    364 	struct icmp *icp;
    365 	struct ip *ip = mtod(m, struct ip *);
    366 	int icmplen;
    367 	int i;
    368 	struct in_ifaddr *ia;
    369 	void *(*ctlfunc) __P((int, struct sockaddr *, void *));
    370 	int code;
    371 	int hlen;
    372 	va_list ap;
    373 
    374 	va_start(ap, m);
    375 	hlen = va_arg(ap, int);
    376 	proto = va_arg(ap, int);
    377 	va_end(ap);
    378 
    379 	/*
    380 	 * Locate icmp structure in mbuf, and check
    381 	 * that not corrupted and of at least minimum length.
    382 	 */
    383 	icmplen = ip->ip_len - hlen;
    384 #ifdef ICMPPRINTFS
    385 	if (icmpprintfs)
    386 		printf("icmp_input from %x to %x, len %d\n",
    387 		    ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr),
    388 		    icmplen);
    389 #endif
    390 	if (icmplen < ICMP_MINLEN) {
    391 		icmpstat.icps_tooshort++;
    392 		goto freeit;
    393 	}
    394 	i = hlen + min(icmplen, ICMP_ADVLENMIN);
    395 	if (m->m_len < i && (m = m_pullup(m, i)) == 0) {
    396 		icmpstat.icps_tooshort++;
    397 		return;
    398 	}
    399 	ip = mtod(m, struct ip *);
    400 	m->m_len -= hlen;
    401 	m->m_data += hlen;
    402 	icp = mtod(m, struct icmp *);
    403 	if (in_cksum(m, icmplen)) {
    404 		icmpstat.icps_checksum++;
    405 		goto freeit;
    406 	}
    407 	m->m_len += hlen;
    408 	m->m_data -= hlen;
    409 
    410 #ifdef ICMPPRINTFS
    411 	/*
    412 	 * Message type specific processing.
    413 	 */
    414 	if (icmpprintfs)
    415 		printf("icmp_input, type %d code %d\n", icp->icmp_type,
    416 		    icp->icmp_code);
    417 #endif
    418 #ifdef IPSEC
    419 	/* drop it if it does not match the policy */
    420 	if (ipsec4_in_reject(m, NULL)) {
    421 		ipsecstat.in_polvio++;
    422 		goto freeit;
    423 	}
    424 #endif
    425 	if (icp->icmp_type > ICMP_MAXTYPE)
    426 		goto raw;
    427 	icmpstat.icps_inhist[icp->icmp_type]++;
    428 	code = icp->icmp_code;
    429 	switch (icp->icmp_type) {
    430 
    431 	case ICMP_UNREACH:
    432 		switch (code) {
    433 			case ICMP_UNREACH_NET:
    434 			case ICMP_UNREACH_HOST:
    435 			case ICMP_UNREACH_PROTOCOL:
    436 			case ICMP_UNREACH_PORT:
    437 			case ICMP_UNREACH_SRCFAIL:
    438 				code += PRC_UNREACH_NET;
    439 				break;
    440 
    441 			case ICMP_UNREACH_NEEDFRAG:
    442 				code = PRC_MSGSIZE;
    443 				break;
    444 
    445 			case ICMP_UNREACH_NET_UNKNOWN:
    446 			case ICMP_UNREACH_NET_PROHIB:
    447 			case ICMP_UNREACH_TOSNET:
    448 				code = PRC_UNREACH_NET;
    449 				break;
    450 
    451 			case ICMP_UNREACH_HOST_UNKNOWN:
    452 			case ICMP_UNREACH_ISOLATED:
    453 			case ICMP_UNREACH_HOST_PROHIB:
    454 			case ICMP_UNREACH_TOSHOST:
    455 				code = PRC_UNREACH_HOST;
    456 				break;
    457 
    458 			default:
    459 				goto badcode;
    460 		}
    461 		goto deliver;
    462 
    463 	case ICMP_TIMXCEED:
    464 		if (code > 1)
    465 			goto badcode;
    466 		code += PRC_TIMXCEED_INTRANS;
    467 		goto deliver;
    468 
    469 	case ICMP_PARAMPROB:
    470 		if (code > 1)
    471 			goto badcode;
    472 		code = PRC_PARAMPROB;
    473 		goto deliver;
    474 
    475 	case ICMP_SOURCEQUENCH:
    476 		if (code)
    477 			goto badcode;
    478 		code = PRC_QUENCH;
    479 		goto deliver;
    480 
    481 	deliver:
    482 		/*
    483 		 * Problem with datagram; advise higher level routines.
    484 		 */
    485 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    486 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    487 			icmpstat.icps_badlen++;
    488 			goto freeit;
    489 		}
    490 		if (IN_MULTICAST(icp->icmp_ip.ip_dst.s_addr))
    491 			goto badcode;
    492 		NTOHS(icp->icmp_ip.ip_len);
    493 #ifdef ICMPPRINTFS
    494 		if (icmpprintfs)
    495 			printf("deliver to protocol %d\n", icp->icmp_ip.ip_p);
    496 #endif
    497 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    498 		ctlfunc = inetsw[ip_protox[icp->icmp_ip.ip_p]].pr_ctlinput;
    499 		if (ctlfunc)
    500 			(void) (*ctlfunc)(code, sintosa(&icmpsrc),
    501 			    &icp->icmp_ip);
    502 		break;
    503 
    504 	badcode:
    505 		icmpstat.icps_badcode++;
    506 		break;
    507 
    508 	case ICMP_ECHO:
    509 		icp->icmp_type = ICMP_ECHOREPLY;
    510 		goto reflect;
    511 
    512 	case ICMP_TSTAMP:
    513 		if (icmplen < ICMP_TSLEN) {
    514 			icmpstat.icps_badlen++;
    515 			break;
    516 		}
    517 		icp->icmp_type = ICMP_TSTAMPREPLY;
    518 		icp->icmp_rtime = iptime();
    519 		icp->icmp_ttime = icp->icmp_rtime;	/* bogus, do later! */
    520 		goto reflect;
    521 
    522 	case ICMP_MASKREQ:
    523 		if (icmpmaskrepl == 0)
    524 			break;
    525 		/*
    526 		 * We are not able to respond with all ones broadcast
    527 		 * unless we receive it over a point-to-point interface.
    528 		 */
    529 		if (icmplen < ICMP_MASKLEN) {
    530 			icmpstat.icps_badlen++;
    531 			break;
    532 		}
    533 		if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    534 		    in_nullhost(ip->ip_dst))
    535 			icmpdst.sin_addr = ip->ip_src;
    536 		else
    537 			icmpdst.sin_addr = ip->ip_dst;
    538 		ia = ifatoia(ifaof_ifpforaddr(sintosa(&icmpdst),
    539 		    m->m_pkthdr.rcvif));
    540 		if (ia == 0)
    541 			break;
    542 		icp->icmp_type = ICMP_MASKREPLY;
    543 		icp->icmp_mask = ia->ia_sockmask.sin_addr.s_addr;
    544 		if (in_nullhost(ip->ip_src)) {
    545 			if (ia->ia_ifp->if_flags & IFF_BROADCAST)
    546 				ip->ip_src = ia->ia_broadaddr.sin_addr;
    547 			else if (ia->ia_ifp->if_flags & IFF_POINTOPOINT)
    548 				ip->ip_src = ia->ia_dstaddr.sin_addr;
    549 		}
    550 reflect:
    551 		icmpstat.icps_reflect++;
    552 		icmpstat.icps_outhist[icp->icmp_type]++;
    553 		icmp_reflect(m);
    554 		return;
    555 
    556 	case ICMP_REDIRECT:
    557 		if (code > 3)
    558 			goto badcode;
    559 		if (icmplen < ICMP_ADVLENMIN || icmplen < ICMP_ADVLEN(icp) ||
    560 		    icp->icmp_ip.ip_hl < (sizeof(struct ip) >> 2)) {
    561 			icmpstat.icps_badlen++;
    562 			break;
    563 		}
    564 		/*
    565 		 * Short circuit routing redirects to force
    566 		 * immediate change in the kernel's routing
    567 		 * tables.  The message is also handed to anyone
    568 		 * listening on a raw socket (e.g. the routing
    569 		 * daemon for use in updating its tables).
    570 		 */
    571 		icmpgw.sin_addr = ip->ip_src;
    572 		icmpdst.sin_addr = icp->icmp_gwaddr;
    573 #ifdef	ICMPPRINTFS
    574 		if (icmpprintfs)
    575 			printf("redirect dst %x to %x\n", icp->icmp_ip.ip_dst,
    576 			    icp->icmp_gwaddr);
    577 #endif
    578 		icmpsrc.sin_addr = icp->icmp_ip.ip_dst;
    579 		rtredirect(sintosa(&icmpsrc), sintosa(&icmpdst),
    580 		    (struct sockaddr *)0, RTF_GATEWAY | RTF_HOST,
    581 		    sintosa(&icmpgw), (struct rtentry **)0);
    582 		pfctlinput(PRC_REDIRECT_HOST, sintosa(&icmpsrc));
    583 #ifdef IPSEC
    584 		key_sa_routechange((struct sockaddr *)&icmpsrc);
    585 #endif
    586 		break;
    587 
    588 	/*
    589 	 * No kernel processing for the following;
    590 	 * just fall through to send to raw listener.
    591 	 */
    592 	case ICMP_ECHOREPLY:
    593 	case ICMP_ROUTERADVERT:
    594 	case ICMP_ROUTERSOLICIT:
    595 	case ICMP_TSTAMPREPLY:
    596 	case ICMP_IREQREPLY:
    597 	case ICMP_MASKREPLY:
    598 	default:
    599 		break;
    600 	}
    601 
    602 raw:
    603 	rip_input(m, hlen, proto);
    604 	return;
    605 
    606 freeit:
    607 	m_freem(m);
    608 	return;
    609 }
    610 
    611 /*
    612  * Reflect the ip packet back to the source
    613  */
    614 void
    615 icmp_reflect(m)
    616 	struct mbuf *m;
    617 {
    618 	struct ip *ip = mtod(m, struct ip *);
    619 	struct in_ifaddr *ia;
    620 	struct ifaddr *ifa;
    621 	struct sockaddr_in *sin = 0;
    622 	struct in_addr t;
    623 	struct mbuf *opts = 0;
    624 	int optlen = (ip->ip_hl << 2) - sizeof(struct ip);
    625 
    626 	if (!in_canforward(ip->ip_src) &&
    627 	    ((ip->ip_src.s_addr & IN_CLASSA_NET) !=
    628 	     htonl(IN_LOOPBACKNET << IN_CLASSA_NSHIFT))) {
    629 		m_freem(m);	/* Bad return address */
    630 		goto done;	/* ip_output() will check for broadcast */
    631 	}
    632 	t = ip->ip_dst;
    633 	ip->ip_dst = ip->ip_src;
    634 	/*
    635 	 * If the incoming packet was addressed directly to us, use
    636 	 * dst as the src for the reply.  Otherwise (broadcast or
    637 	 * anonymous), use an address which corresponds to the
    638 	 * incoming interface, with a preference for the address which
    639 	 * corresponds to the route to the destination of the ICMP.
    640 	 */
    641 
    642 	/* Look for packet addressed to us */
    643 	INADDR_TO_IA(t, ia);
    644 
    645 	/* look for packet sent to broadcast address */
    646 	if (ia == NULL && (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST)) {
    647 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    648 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    649 			if (ifa->ifa_addr->sa_family != AF_INET)
    650 				continue;
    651 			if (in_hosteq(t,ifatoia(ifa)->ia_broadaddr.sin_addr)) {
    652 				ia = ifatoia(ifa);
    653 				break;
    654 			}
    655 		}
    656 	}
    657 
    658 	if (ia)
    659 		sin = &ia->ia_addr;
    660 
    661 	icmpdst.sin_addr = t;
    662 
    663 	/* if the packet is addressed somewhere else, compute the
    664 	   source address for packets routed back to the source, and
    665 	   use that, if it's an address on the interface which
    666 	   received the packet */
    667 	if (sin == (struct sockaddr_in *)0) {
    668 		struct sockaddr_in sin_dst;
    669 		struct route icmproute;
    670 		int errornum;
    671 
    672 		sin_dst.sin_family = AF_INET;
    673 		sin_dst.sin_len = sizeof(struct sockaddr_in);
    674 		sin_dst.sin_addr = ip->ip_dst;
    675 		bzero(&icmproute, sizeof(icmproute));
    676 		errornum = 0;
    677 		sin = in_selectsrc(&sin_dst, &icmproute, 0, NULL, &errornum);
    678 		/* errornum is never used */
    679 		if (icmproute.ro_rt)
    680 			RTFREE(icmproute.ro_rt);
    681 		/* check to make sure sin is a source address on rcvif */
    682 		if (sin) {
    683 			t = sin->sin_addr;
    684 			sin = (struct sockaddr_in *)0;
    685 			INADDR_TO_IA(t, ia);
    686 			while (ia) {
    687 				if (ia->ia_ifp == m->m_pkthdr.rcvif) {
    688 					sin = &ia->ia_addr;
    689 					break;
    690 				}
    691 				NEXT_IA_WITH_SAME_ADDR(ia);
    692 			}
    693 		}
    694 	}
    695 
    696 	/* if it was not addressed to us, but the route doesn't go out
    697 	   the source interface, pick an address on the source
    698 	   interface.  This can happen when routing is asymmetric, or
    699 	   when the incoming packet was encapsulated */
    700 	if (sin == (struct sockaddr_in *)0) {
    701 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    702 		     ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    703 			if (ifa->ifa_addr->sa_family != AF_INET)
    704 				continue;
    705 			sin = &(ifatoia(ifa)->ia_addr);
    706 			break;
    707 		}
    708 	}
    709 
    710 	/*
    711 	 * The following happens if the packet was not addressed to us,
    712 	 * and was received on an interface with no IP address:
    713 	 * We find the first AF_INET address on the first non-loopback
    714 	 * interface.
    715 	 */
    716 	if (sin == (struct sockaddr_in *)0)
    717 		for (ia = in_ifaddr.tqh_first; ia != NULL;
    718 		    ia = ia->ia_list.tqe_next) {
    719 			if (ia->ia_ifp->if_flags & IFF_LOOPBACK)
    720 				continue;
    721 			sin = &ia->ia_addr;
    722 			break;
    723 		}
    724 
    725 	/*
    726 	 * If we still didn't find an address, punt.  We could have an
    727 	 * interface up (and receiving packets) with no address.
    728 	 */
    729 	if (sin == (struct sockaddr_in *)0) {
    730 		m_freem(m);
    731 		goto done;
    732 	}
    733 
    734 	ip->ip_src = sin->sin_addr;
    735 	ip->ip_ttl = MAXTTL;
    736 
    737 	if (optlen > 0) {
    738 		u_char *cp;
    739 		int opt, cnt;
    740 		u_int len;
    741 
    742 		/*
    743 		 * Retrieve any source routing from the incoming packet;
    744 		 * add on any record-route or timestamp options.
    745 		 */
    746 		cp = (u_char *) (ip + 1);
    747 		if ((opts = ip_srcroute()) == 0 &&
    748 		    (opts = m_gethdr(M_DONTWAIT, MT_HEADER))) {
    749 			opts->m_len = sizeof(struct in_addr);
    750 			*mtod(opts, struct in_addr *) = zeroin_addr;
    751 		}
    752 		if (opts) {
    753 #ifdef ICMPPRINTFS
    754 		    if (icmpprintfs)
    755 			    printf("icmp_reflect optlen %d rt %d => ",
    756 				optlen, opts->m_len);
    757 #endif
    758 		    for (cnt = optlen; cnt > 0; cnt -= len, cp += len) {
    759 			    opt = cp[IPOPT_OPTVAL];
    760 			    if (opt == IPOPT_EOL)
    761 				    break;
    762 			    if (opt == IPOPT_NOP)
    763 				    len = 1;
    764 			    else {
    765 				    if (cnt < IPOPT_OLEN + sizeof(*cp))
    766 					    break;
    767 				    len = cp[IPOPT_OLEN];
    768 				    if (len < IPOPT_OLEN + sizeof(*cp) ||
    769 				        len > cnt)
    770 					    break;
    771 			    }
    772 			    /*
    773 			     * Should check for overflow, but it "can't happen"
    774 			     */
    775 			    if (opt == IPOPT_RR || opt == IPOPT_TS ||
    776 				opt == IPOPT_SECURITY) {
    777 				    bcopy((caddr_t)cp,
    778 					mtod(opts, caddr_t) + opts->m_len, len);
    779 				    opts->m_len += len;
    780 			    }
    781 		    }
    782 		    /* Terminate & pad, if necessary */
    783 		    if ((cnt = opts->m_len % 4) != 0) {
    784 			    for (; cnt < 4; cnt++) {
    785 				    *(mtod(opts, caddr_t) + opts->m_len) =
    786 					IPOPT_EOL;
    787 				    opts->m_len++;
    788 			    }
    789 		    }
    790 #ifdef ICMPPRINTFS
    791 		    if (icmpprintfs)
    792 			    printf("%d\n", opts->m_len);
    793 #endif
    794 		}
    795 		/*
    796 		 * Now strip out original options by copying rest of first
    797 		 * mbuf's data back, and adjust the IP length.
    798 		 */
    799 		ip->ip_len -= optlen;
    800 		ip->ip_hl = sizeof(struct ip) >> 2;
    801 		m->m_len -= optlen;
    802 		if (m->m_flags & M_PKTHDR)
    803 			m->m_pkthdr.len -= optlen;
    804 		optlen += sizeof(struct ip);
    805 		bcopy((caddr_t)ip + optlen, (caddr_t)(ip + 1),
    806 			 (unsigned)(m->m_len - sizeof(struct ip)));
    807 	}
    808 	m->m_flags &= ~(M_BCAST|M_MCAST);
    809 	icmp_send(m, opts);
    810 done:
    811 	if (opts)
    812 		(void)m_free(opts);
    813 }
    814 
    815 /*
    816  * Send an icmp packet back to the ip level,
    817  * after supplying a checksum.
    818  */
    819 void
    820 icmp_send(m, opts)
    821 	struct mbuf *m;
    822 	struct mbuf *opts;
    823 {
    824 	struct ip *ip = mtod(m, struct ip *);
    825 	int hlen;
    826 	struct icmp *icp;
    827 
    828 	hlen = ip->ip_hl << 2;
    829 	m->m_data += hlen;
    830 	m->m_len -= hlen;
    831 	icp = mtod(m, struct icmp *);
    832 	icp->icmp_cksum = 0;
    833 	icp->icmp_cksum = in_cksum(m, ip->ip_len - hlen);
    834 	m->m_data -= hlen;
    835 	m->m_len += hlen;
    836 #ifdef ICMPPRINTFS
    837 	if (icmpprintfs)
    838 		printf("icmp_send dst %x src %x\n", ip->ip_dst, ip->ip_src);
    839 #endif
    840 #ifdef IPSEC
    841 	/* Don't lookup socket */
    842 	ipsec_setsocket(m, NULL);
    843 #endif
    844 	(void) ip_output(m, opts, NULL, 0, NULL);
    845 }
    846 
    847 n_time
    848 iptime()
    849 {
    850 	struct timeval atv;
    851 	u_long t;
    852 
    853 	microtime(&atv);
    854 	t = (atv.tv_sec % (24*60*60)) * 1000 + atv.tv_usec / 1000;
    855 	return (htonl(t));
    856 }
    857 
    858 int
    859 icmp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
    860 	int *name;
    861 	u_int namelen;
    862 	void *oldp;
    863 	size_t *oldlenp;
    864 	void *newp;
    865 	size_t newlen;
    866 {
    867 	int arg, error;
    868 
    869 	/* All sysctl names at this level are terminal. */
    870 	if (namelen != 1)
    871 		return (ENOTDIR);
    872 
    873 	switch (name[0])
    874 	{
    875 	case ICMPCTL_MASKREPL:
    876 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmpmaskrepl);
    877 		break;
    878 	case ICMPCTL_RETURNDATABYTES:
    879 		arg = icmpreturndatabytes;
    880 		error = sysctl_int(oldp, oldlenp, newp, newlen, &arg);
    881 		if (error)
    882 			break;
    883 		if ((arg >= 8) || (arg <= 512))
    884 			icmpreturndatabytes = arg;
    885 		else
    886 			error = EINVAL;
    887 		break;
    888 	case ICMPCTL_ERRPPSLIMIT:
    889 		error = sysctl_int(oldp, oldlenp, newp, newlen, &icmperrppslim);
    890 		break;
    891 	default:
    892 		error = ENOPROTOOPT;
    893 		break;
    894 	}
    895 	return error;
    896 }
    897 
    898 void
    899 icmp_mtudisc(icp, faddr)
    900 	struct icmp *icp;
    901 	struct in_addr faddr;
    902 {
    903 	struct mtudisc_callback *mc;
    904 	struct sockaddr *dst = sintosa(&icmpsrc);
    905 	struct rtentry *rt;
    906 	u_long mtu = ntohs(icp->icmp_nextmtu);  /* Why a long?  IPv6 */
    907 	int    error;
    908 
    909 	/* Table of common MTUs: */
    910 
    911 	static u_long mtu_table[] = {65535, 65280, 32000, 17914, 9180, 8166,
    912 				     4352, 2002, 1492, 1006, 508, 296, 68, 0};
    913 
    914 	rt = rtalloc1(dst, 1);
    915 	if (rt == 0)
    916 		return;
    917 
    918 	/* If we didn't get a host route, allocate one */
    919 
    920 	if ((rt->rt_flags & RTF_HOST) == 0) {
    921 		struct rtentry *nrt;
    922 
    923 		error = rtrequest((int) RTM_ADD, dst,
    924 		    (struct sockaddr *) rt->rt_gateway,
    925 		    (struct sockaddr *) 0,
    926 		    RTF_GATEWAY | RTF_HOST | RTF_DYNAMIC, &nrt);
    927 		if (error) {
    928 			rtfree(rt);
    929 			rtfree(nrt);
    930 			return;
    931 		}
    932 		nrt->rt_rmx = rt->rt_rmx;
    933 		rtfree(rt);
    934 		rt = nrt;
    935 	}
    936 	error = rt_timer_add(rt, icmp_mtudisc_timeout, ip_mtudisc_timeout_q);
    937 	if (error) {
    938 		rtfree(rt);
    939 		return;
    940 	}
    941 
    942 	if (mtu == 0) {
    943 		int i = 0;
    944 
    945 		mtu = icp->icmp_ip.ip_len; /* NTOHS happened in deliver: */
    946 		/* Some 4.2BSD-based routers incorrectly adjust the ip_len */
    947 		if (mtu > rt->rt_rmx.rmx_mtu && rt->rt_rmx.rmx_mtu != 0)
    948 			mtu -= (icp->icmp_ip.ip_hl << 2);
    949 
    950 		/* If we still can't guess a value, try the route */
    951 
    952 		if (mtu == 0) {
    953 			mtu = rt->rt_rmx.rmx_mtu;
    954 
    955 			/* If no route mtu, default to the interface mtu */
    956 
    957 			if (mtu == 0)
    958 				mtu = rt->rt_ifp->if_mtu;
    959 		}
    960 
    961 		for (i = 0; i < sizeof(mtu_table) / sizeof(mtu_table[0]); i++)
    962 			if (mtu > mtu_table[i]) {
    963 				mtu = mtu_table[i];
    964 				break;
    965 			}
    966 	}
    967 
    968 	/*
    969 	 * XXX:   RTV_MTU is overloaded, since the admin can set it
    970 	 *	  to turn off PMTU for a route, and the kernel can
    971 	 *	  set it to indicate a serious problem with PMTU
    972 	 *	  on a route.  We should be using a separate flag
    973 	 *	  for the kernel to indicate this.
    974 	 */
    975 
    976 	if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
    977 		if (mtu < 296 || mtu > rt->rt_ifp->if_mtu)
    978 			rt->rt_rmx.rmx_locks |= RTV_MTU;
    979 		else if (rt->rt_rmx.rmx_mtu > mtu ||
    980 			 rt->rt_rmx.rmx_mtu == 0)
    981 			rt->rt_rmx.rmx_mtu = mtu;
    982 	}
    983 
    984 	if (rt)
    985 		rtfree(rt);
    986 
    987 	/*
    988 	 * Notify protocols that the MTU for this destination
    989 	 * has changed.
    990 	 */
    991 	for (mc = LIST_FIRST(&icmp_mtudisc_callbacks); mc != NULL;
    992 	     mc = LIST_NEXT(mc, mc_list))
    993 		(*mc->mc_func)(faddr);
    994 }
    995 
    996 /*
    997  * Return the next larger or smaller MTU plateau (table from RFC 1191)
    998  * given current value MTU.  If DIR is less than zero, a larger plateau
    999  * is returned; otherwise, a smaller value is returned.
   1000  */
   1001 int
   1002 ip_next_mtu(mtu, dir)	/* XXX */
   1003 	int mtu;
   1004 	int dir;
   1005 {
   1006 	static int mtutab[] = {
   1007 		65535, 32000, 17914, 8166, 4352, 2002, 1492, 1006, 508, 296,
   1008 		68, 0
   1009 	};
   1010 	int i;
   1011 
   1012 	for (i = 0; i < (sizeof mtutab) / (sizeof mtutab[0]); i++) {
   1013 		if (mtu >= mtutab[i])
   1014 			break;
   1015 	}
   1016 
   1017 	if (dir < 0) {
   1018 		if (i == 0) {
   1019 			return 0;
   1020 		} else {
   1021 			return mtutab[i - 1];
   1022 		}
   1023 	} else {
   1024 		if (mtutab[i] == 0) {
   1025 			return 0;
   1026 		} else if(mtu > mtutab[i]) {
   1027 			return mtutab[i];
   1028 		} else {
   1029 			return mtutab[i + 1];
   1030 		}
   1031 	}
   1032 }
   1033 
   1034 static void
   1035 icmp_mtudisc_timeout(rt, r)
   1036 	struct rtentry *rt;
   1037 	struct rttimer *r;
   1038 {
   1039 	if (rt == NULL)
   1040 		panic("icmp_mtudisc_timeout:  bad route to timeout");
   1041 	if ((rt->rt_flags & (RTF_DYNAMIC | RTF_HOST)) ==
   1042 	    (RTF_DYNAMIC | RTF_HOST)) {
   1043 		rtrequest((int) RTM_DELETE, (struct sockaddr *)rt_key(rt),
   1044 		    rt->rt_gateway, rt_mask(rt), rt->rt_flags, 0);
   1045 	} else {
   1046 		if ((rt->rt_rmx.rmx_locks & RTV_MTU) == 0) {
   1047 			rt->rt_rmx.rmx_mtu = 0;
   1048 		}
   1049 	}
   1050 }
   1051 
   1052 /*
   1053  * Perform rate limit check.
   1054  * Returns 0 if it is okay to send the icmp packet.
   1055  * Returns 1 if the router SHOULD NOT send this icmp packet due to rate
   1056  * limitation.
   1057  *
   1058  * XXX per-destination/type check necessary?
   1059  */
   1060 static int
   1061 icmp_ratelimit(dst, type, code)
   1062 	const struct in_addr *dst;
   1063 	const int type;			/* not used at this moment */
   1064 	const int code;			/* not used at this moment */
   1065 {
   1066 
   1067 	/* PPS limit */
   1068 	if (!ppsratecheck(&icmperrppslim_last, &icmperrpps_count,
   1069 	    icmperrppslim)) {
   1070 		/* The packet is subject to rate limit */
   1071 		return 1;
   1072 	}
   1073 
   1074 	/*okay to send*/
   1075 	return 0;
   1076 }
   1077