Home | History | Annotate | Line # | Download | only in netinet
ip_input.c revision 1.139
      1  1.139      matt /*	$NetBSD: ip_input.c,v 1.139 2001/11/04 13:42:27 matt Exp $	*/
      2   1.89    itojun 
      3   1.89    itojun /*
      4   1.89    itojun  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5   1.89    itojun  * All rights reserved.
      6   1.89    itojun  *
      7   1.89    itojun  * Redistribution and use in source and binary forms, with or without
      8   1.89    itojun  * modification, are permitted provided that the following conditions
      9   1.89    itojun  * are met:
     10   1.89    itojun  * 1. Redistributions of source code must retain the above copyright
     11   1.89    itojun  *    notice, this list of conditions and the following disclaimer.
     12   1.89    itojun  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.89    itojun  *    notice, this list of conditions and the following disclaimer in the
     14   1.89    itojun  *    documentation and/or other materials provided with the distribution.
     15   1.89    itojun  * 3. Neither the name of the project nor the names of its contributors
     16   1.89    itojun  *    may be used to endorse or promote products derived from this software
     17   1.89    itojun  *    without specific prior written permission.
     18   1.89    itojun  *
     19   1.89    itojun  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20   1.89    itojun  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21   1.89    itojun  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22   1.89    itojun  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23   1.89    itojun  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24   1.89    itojun  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25   1.89    itojun  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26   1.89    itojun  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27   1.89    itojun  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28   1.89    itojun  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29   1.89    itojun  * SUCH DAMAGE.
     30   1.89    itojun  */
     31   1.76   thorpej 
     32   1.76   thorpej /*-
     33   1.76   thorpej  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34   1.76   thorpej  * All rights reserved.
     35   1.76   thorpej  *
     36   1.76   thorpej  * This code is derived from software contributed to The NetBSD Foundation
     37   1.76   thorpej  * by Public Access Networks Corporation ("Panix").  It was developed under
     38   1.76   thorpej  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39   1.76   thorpej  *
     40   1.76   thorpej  * Redistribution and use in source and binary forms, with or without
     41   1.76   thorpej  * modification, are permitted provided that the following conditions
     42   1.76   thorpej  * are met:
     43   1.76   thorpej  * 1. Redistributions of source code must retain the above copyright
     44   1.76   thorpej  *    notice, this list of conditions and the following disclaimer.
     45   1.76   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     46   1.76   thorpej  *    notice, this list of conditions and the following disclaimer in the
     47   1.76   thorpej  *    documentation and/or other materials provided with the distribution.
     48   1.76   thorpej  * 3. All advertising materials mentioning features or use of this software
     49   1.76   thorpej  *    must display the following acknowledgement:
     50   1.76   thorpej  *	This product includes software developed by the NetBSD
     51   1.76   thorpej  *	Foundation, Inc. and its contributors.
     52   1.76   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53   1.76   thorpej  *    contributors may be used to endorse or promote products derived
     54   1.76   thorpej  *    from this software without specific prior written permission.
     55   1.76   thorpej  *
     56   1.76   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57   1.76   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58   1.76   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59   1.76   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60   1.76   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61   1.76   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62   1.76   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63   1.76   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64   1.76   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65   1.76   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66   1.76   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     67   1.76   thorpej  */
     68   1.14       cgd 
     69    1.1       cgd /*
     70   1.13   mycroft  * Copyright (c) 1982, 1986, 1988, 1993
     71   1.13   mycroft  *	The Regents of the University of California.  All rights reserved.
     72    1.1       cgd  *
     73    1.1       cgd  * Redistribution and use in source and binary forms, with or without
     74    1.1       cgd  * modification, are permitted provided that the following conditions
     75    1.1       cgd  * are met:
     76    1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     77    1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     78    1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     79    1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     80    1.1       cgd  *    documentation and/or other materials provided with the distribution.
     81    1.1       cgd  * 3. All advertising materials mentioning features or use of this software
     82    1.1       cgd  *    must display the following acknowledgement:
     83    1.1       cgd  *	This product includes software developed by the University of
     84    1.1       cgd  *	California, Berkeley and its contributors.
     85    1.1       cgd  * 4. Neither the name of the University nor the names of its contributors
     86    1.1       cgd  *    may be used to endorse or promote products derived from this software
     87    1.1       cgd  *    without specific prior written permission.
     88    1.1       cgd  *
     89    1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     90    1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     91    1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     92    1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     93    1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     94    1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     95    1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     96    1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     97    1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     98    1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     99    1.1       cgd  * SUCH DAMAGE.
    100    1.1       cgd  *
    101   1.14       cgd  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
    102    1.1       cgd  */
    103   1.55    scottr 
    104   1.62      matt #include "opt_gateway.h"
    105   1.69       mrg #include "opt_pfil_hooks.h"
    106   1.91   thorpej #include "opt_ipsec.h"
    107   1.55    scottr #include "opt_mrouting.h"
    108  1.135   thorpej #include "opt_inet_csum.h"
    109    1.1       cgd 
    110    1.5   mycroft #include <sys/param.h>
    111    1.5   mycroft #include <sys/systm.h>
    112    1.5   mycroft #include <sys/malloc.h>
    113    1.5   mycroft #include <sys/mbuf.h>
    114    1.5   mycroft #include <sys/domain.h>
    115    1.5   mycroft #include <sys/protosw.h>
    116    1.5   mycroft #include <sys/socket.h>
    117   1.44   thorpej #include <sys/socketvar.h>
    118    1.5   mycroft #include <sys/errno.h>
    119    1.5   mycroft #include <sys/time.h>
    120    1.5   mycroft #include <sys/kernel.h>
    121   1.72   thorpej #include <sys/pool.h>
    122   1.28  christos #include <sys/sysctl.h>
    123    1.1       cgd 
    124    1.5   mycroft #include <net/if.h>
    125   1.44   thorpej #include <net/if_dl.h>
    126    1.5   mycroft #include <net/route.h>
    127   1.45       mrg #include <net/pfil.h>
    128    1.1       cgd 
    129    1.5   mycroft #include <netinet/in.h>
    130    1.5   mycroft #include <netinet/in_systm.h>
    131    1.5   mycroft #include <netinet/ip.h>
    132    1.5   mycroft #include <netinet/in_pcb.h>
    133    1.5   mycroft #include <netinet/in_var.h>
    134    1.5   mycroft #include <netinet/ip_var.h>
    135    1.5   mycroft #include <netinet/ip_icmp.h>
    136   1.89    itojun /* just for gif_ttl */
    137   1.89    itojun #include <netinet/in_gif.h>
    138   1.89    itojun #include "gif.h"
    139  1.111  jdolecek 
    140  1.111  jdolecek #ifdef MROUTING
    141  1.111  jdolecek #include <netinet/ip_mroute.h>
    142  1.111  jdolecek #endif
    143   1.89    itojun 
    144   1.89    itojun #ifdef IPSEC
    145   1.89    itojun #include <netinet6/ipsec.h>
    146   1.89    itojun #include <netkey/key.h>
    147   1.89    itojun #endif
    148   1.44   thorpej 
    149    1.1       cgd #ifndef	IPFORWARDING
    150    1.1       cgd #ifdef GATEWAY
    151    1.1       cgd #define	IPFORWARDING	1	/* forward IP packets not for us */
    152    1.1       cgd #else /* GATEWAY */
    153    1.1       cgd #define	IPFORWARDING	0	/* don't forward IP packets not for us */
    154    1.1       cgd #endif /* GATEWAY */
    155    1.1       cgd #endif /* IPFORWARDING */
    156    1.1       cgd #ifndef	IPSENDREDIRECTS
    157    1.1       cgd #define	IPSENDREDIRECTS	1
    158    1.1       cgd #endif
    159   1.26   thorpej #ifndef IPFORWSRCRT
    160   1.47       cjs #define	IPFORWSRCRT	1	/* forward source-routed packets */
    161   1.47       cjs #endif
    162   1.47       cjs #ifndef IPALLOWSRCRT
    163   1.48       mrg #define	IPALLOWSRCRT	1	/* allow source-routed packets */
    164   1.26   thorpej #endif
    165   1.53       kml #ifndef IPMTUDISC
    166   1.53       kml #define IPMTUDISC	0
    167   1.53       kml #endif
    168   1.60       kml #ifndef IPMTUDISCTIMEOUT
    169   1.61       kml #define IPMTUDISCTIMEOUT (10 * 60)	/* as per RFC 1191 */
    170   1.60       kml #endif
    171   1.53       kml 
    172   1.27   thorpej /*
    173   1.27   thorpej  * Note: DIRECTED_BROADCAST is handled this way so that previous
    174   1.27   thorpej  * configuration using this option will Just Work.
    175   1.27   thorpej  */
    176   1.27   thorpej #ifndef IPDIRECTEDBCAST
    177   1.27   thorpej #ifdef DIRECTED_BROADCAST
    178   1.27   thorpej #define IPDIRECTEDBCAST	1
    179   1.27   thorpej #else
    180   1.27   thorpej #define	IPDIRECTEDBCAST	0
    181   1.27   thorpej #endif /* DIRECTED_BROADCAST */
    182   1.27   thorpej #endif /* IPDIRECTEDBCAST */
    183    1.1       cgd int	ipforwarding = IPFORWARDING;
    184    1.1       cgd int	ipsendredirects = IPSENDREDIRECTS;
    185   1.13   mycroft int	ip_defttl = IPDEFTTL;
    186   1.26   thorpej int	ip_forwsrcrt = IPFORWSRCRT;
    187   1.27   thorpej int	ip_directedbcast = IPDIRECTEDBCAST;
    188   1.47       cjs int	ip_allowsrcrt = IPALLOWSRCRT;
    189   1.53       kml int	ip_mtudisc = IPMTUDISC;
    190   1.60       kml u_int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
    191    1.1       cgd #ifdef DIAGNOSTIC
    192    1.1       cgd int	ipprintfs = 0;
    193    1.1       cgd #endif
    194    1.1       cgd 
    195   1.60       kml struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
    196   1.60       kml 
    197    1.1       cgd extern	struct domain inetdomain;
    198    1.1       cgd int	ipqmaxlen = IFQ_MAXLEN;
    199   1.22   mycroft struct	in_ifaddrhead in_ifaddr;
    200   1.57       tls struct	in_ifaddrhashhead *in_ifaddrhashtbl;
    201   1.13   mycroft struct	ifqueue ipintrq;
    202   1.63      matt struct	ipstat	ipstat;
    203   1.63      matt u_int16_t	ip_id;
    204   1.75   thorpej 
    205  1.121   thorpej #ifdef PFIL_HOOKS
    206  1.121   thorpej struct pfil_head inet_pfil_hook;
    207  1.121   thorpej #endif
    208  1.121   thorpej 
    209   1.63      matt struct ipqhead ipq;
    210   1.75   thorpej int	ipq_locked;
    211  1.131    itojun int	ip_nfragpackets = 0;
    212  1.133    itojun int	ip_maxfragpackets = 200;
    213   1.75   thorpej 
    214   1.75   thorpej static __inline int ipq_lock_try __P((void));
    215   1.75   thorpej static __inline void ipq_unlock __P((void));
    216   1.75   thorpej 
    217   1.75   thorpej static __inline int
    218   1.75   thorpej ipq_lock_try()
    219   1.75   thorpej {
    220   1.75   thorpej 	int s;
    221   1.75   thorpej 
    222  1.132   thorpej 	/*
    223  1.132   thorpej 	 * Use splvm() -- we're bloking things that would cause
    224  1.132   thorpej 	 * mbuf allocation.
    225  1.132   thorpej 	 */
    226  1.132   thorpej 	s = splvm();
    227   1.75   thorpej 	if (ipq_locked) {
    228   1.75   thorpej 		splx(s);
    229   1.75   thorpej 		return (0);
    230   1.75   thorpej 	}
    231   1.75   thorpej 	ipq_locked = 1;
    232   1.75   thorpej 	splx(s);
    233   1.75   thorpej 	return (1);
    234   1.75   thorpej }
    235   1.75   thorpej 
    236   1.75   thorpej static __inline void
    237   1.75   thorpej ipq_unlock()
    238   1.75   thorpej {
    239   1.75   thorpej 	int s;
    240   1.75   thorpej 
    241  1.132   thorpej 	s = splvm();
    242   1.75   thorpej 	ipq_locked = 0;
    243   1.75   thorpej 	splx(s);
    244   1.75   thorpej }
    245   1.75   thorpej 
    246   1.75   thorpej #ifdef DIAGNOSTIC
    247   1.75   thorpej #define	IPQ_LOCK()							\
    248   1.75   thorpej do {									\
    249   1.75   thorpej 	if (ipq_lock_try() == 0) {					\
    250   1.75   thorpej 		printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
    251   1.75   thorpej 		panic("ipq_lock");					\
    252   1.75   thorpej 	}								\
    253   1.75   thorpej } while (0)
    254   1.75   thorpej #define	IPQ_LOCK_CHECK()						\
    255   1.75   thorpej do {									\
    256   1.75   thorpej 	if (ipq_locked == 0) {						\
    257   1.75   thorpej 		printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
    258   1.75   thorpej 		panic("ipq lock check");				\
    259   1.75   thorpej 	}								\
    260   1.75   thorpej } while (0)
    261   1.75   thorpej #else
    262   1.75   thorpej #define	IPQ_LOCK()		(void) ipq_lock_try()
    263   1.75   thorpej #define	IPQ_LOCK_CHECK()	/* nothing */
    264   1.75   thorpej #endif
    265   1.75   thorpej 
    266   1.75   thorpej #define	IPQ_UNLOCK()		ipq_unlock()
    267    1.1       cgd 
    268   1.72   thorpej struct pool ipqent_pool;
    269   1.72   thorpej 
    270  1.135   thorpej #ifdef INET_CSUM_COUNTERS
    271  1.135   thorpej #include <sys/device.h>
    272  1.135   thorpej 
    273  1.135   thorpej struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    274  1.135   thorpej     NULL, "inet", "hwcsum bad");
    275  1.135   thorpej struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    276  1.135   thorpej     NULL, "inet", "hwcsum ok");
    277  1.135   thorpej struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    278  1.135   thorpej     NULL, "inet", "swcsum");
    279  1.135   thorpej 
    280  1.135   thorpej #define	INET_CSUM_COUNTER_INCR(ev)	(ev)->ev_count++
    281  1.135   thorpej 
    282  1.135   thorpej #else
    283  1.135   thorpej 
    284  1.135   thorpej #define	INET_CSUM_COUNTER_INCR(ev)	/* nothing */
    285  1.135   thorpej 
    286  1.135   thorpej #endif /* INET_CSUM_COUNTERS */
    287  1.135   thorpej 
    288    1.1       cgd /*
    289    1.1       cgd  * We need to save the IP options in case a protocol wants to respond
    290    1.1       cgd  * to an incoming packet over the same route if the packet got here
    291    1.1       cgd  * using IP source routing.  This allows connection establishment and
    292    1.1       cgd  * maintenance when the remote end is on a network that is not known
    293    1.1       cgd  * to us.
    294    1.1       cgd  */
    295    1.1       cgd int	ip_nhops = 0;
    296    1.1       cgd static	struct ip_srcrt {
    297    1.1       cgd 	struct	in_addr dst;			/* final destination */
    298    1.1       cgd 	char	nop;				/* one NOP to align */
    299    1.1       cgd 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
    300    1.1       cgd 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
    301    1.1       cgd } ip_srcrt;
    302    1.1       cgd 
    303   1.13   mycroft static void save_rte __P((u_char *, struct in_addr));
    304   1.35   mycroft 
    305    1.1       cgd /*
    306    1.1       cgd  * IP initialization: fill in IP protocol switch table.
    307    1.1       cgd  * All protocols not implemented in kernel go to raw IP protocol handler.
    308    1.1       cgd  */
    309    1.8   mycroft void
    310    1.1       cgd ip_init()
    311    1.1       cgd {
    312  1.109  augustss 	struct protosw *pr;
    313  1.109  augustss 	int i;
    314    1.1       cgd 
    315   1.72   thorpej 	pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl",
    316   1.72   thorpej 	    0, NULL, NULL, M_IPQ);
    317   1.72   thorpej 
    318    1.1       cgd 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
    319    1.1       cgd 	if (pr == 0)
    320    1.1       cgd 		panic("ip_init");
    321    1.1       cgd 	for (i = 0; i < IPPROTO_MAX; i++)
    322    1.1       cgd 		ip_protox[i] = pr - inetsw;
    323    1.1       cgd 	for (pr = inetdomain.dom_protosw;
    324    1.1       cgd 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
    325    1.1       cgd 		if (pr->pr_domain->dom_family == PF_INET &&
    326    1.1       cgd 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
    327    1.1       cgd 			ip_protox[pr->pr_protocol] = pr - inetsw;
    328   1.25       cgd 	LIST_INIT(&ipq);
    329    1.1       cgd 	ip_id = time.tv_sec & 0xffff;
    330    1.1       cgd 	ipintrq.ifq_maxlen = ipqmaxlen;
    331   1.22   mycroft 	TAILQ_INIT(&in_ifaddr);
    332  1.120        ad 	in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
    333  1.120        ad 	    M_WAITOK, &in_ifaddrhash);
    334   1.60       kml 	if (ip_mtudisc != 0)
    335   1.60       kml 		ip_mtudisc_timeout_q =
    336   1.60       kml 		    rt_timer_queue_create(ip_mtudisc_timeout);
    337   1.73   thorpej #ifdef GATEWAY
    338   1.73   thorpej 	ipflow_init();
    339   1.73   thorpej #endif
    340  1.121   thorpej 
    341  1.121   thorpej #ifdef PFIL_HOOKS
    342  1.121   thorpej 	/* Register our Packet Filter hook. */
    343  1.126   thorpej 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
    344  1.126   thorpej 	inet_pfil_hook.ph_af   = AF_INET;
    345  1.121   thorpej 	i = pfil_head_register(&inet_pfil_hook);
    346  1.121   thorpej 	if (i != 0)
    347  1.121   thorpej 		printf("ip_init: WARNING: unable to register pfil hook, "
    348  1.121   thorpej 		    "error %d\n", i);
    349  1.121   thorpej #endif /* PFIL_HOOKS */
    350  1.135   thorpej 
    351  1.135   thorpej #ifdef INET_CSUM_COUNTERS
    352  1.135   thorpej 	evcnt_attach_static(&ip_hwcsum_bad);
    353  1.135   thorpej 	evcnt_attach_static(&ip_hwcsum_ok);
    354  1.135   thorpej 	evcnt_attach_static(&ip_swcsum);
    355  1.135   thorpej #endif /* INET_CSUM_COUNTERS */
    356    1.1       cgd }
    357    1.1       cgd 
    358    1.1       cgd struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
    359    1.1       cgd struct	route ipforward_rt;
    360    1.1       cgd 
    361    1.1       cgd /*
    362   1.89    itojun  * IP software interrupt routine
    363   1.89    itojun  */
    364   1.89    itojun void
    365   1.89    itojun ipintr()
    366   1.89    itojun {
    367   1.89    itojun 	int s;
    368   1.89    itojun 	struct mbuf *m;
    369   1.89    itojun 
    370   1.89    itojun 	while (1) {
    371  1.132   thorpej 		s = splnet();
    372   1.89    itojun 		IF_DEQUEUE(&ipintrq, m);
    373   1.89    itojun 		splx(s);
    374   1.89    itojun 		if (m == 0)
    375   1.89    itojun 			return;
    376   1.89    itojun 		ip_input(m);
    377   1.89    itojun 	}
    378   1.89    itojun }
    379   1.89    itojun 
    380   1.89    itojun /*
    381    1.1       cgd  * Ip input routine.  Checksum and byte swap header.  If fragmented
    382    1.1       cgd  * try to reassemble.  Process options.  Pass to next level.
    383    1.1       cgd  */
    384    1.8   mycroft void
    385   1.89    itojun ip_input(struct mbuf *m)
    386    1.1       cgd {
    387  1.109  augustss 	struct ip *ip = NULL;
    388  1.109  augustss 	struct ipq *fp;
    389  1.109  augustss 	struct in_ifaddr *ia;
    390  1.109  augustss 	struct ifaddr *ifa;
    391   1.25       cgd 	struct ipqent *ipqe;
    392   1.89    itojun 	int hlen = 0, mff, len;
    393  1.100    itojun 	int downmatch;
    394    1.1       cgd 
    395    1.1       cgd #ifdef	DIAGNOSTIC
    396    1.1       cgd 	if ((m->m_flags & M_PKTHDR) == 0)
    397    1.1       cgd 		panic("ipintr no HDR");
    398    1.1       cgd #endif
    399   1.89    itojun #ifdef IPSEC
    400   1.89    itojun 	/*
    401   1.89    itojun 	 * should the inner packet be considered authentic?
    402   1.89    itojun 	 * see comment in ah4_input().
    403   1.89    itojun 	 */
    404   1.89    itojun 	if (m) {
    405   1.89    itojun 		m->m_flags &= ~M_AUTHIPHDR;
    406   1.89    itojun 		m->m_flags &= ~M_AUTHIPDGM;
    407   1.89    itojun 	}
    408   1.89    itojun #endif
    409    1.1       cgd 	/*
    410    1.1       cgd 	 * If no IP addresses have been set yet but the interfaces
    411    1.1       cgd 	 * are receiving, can't do anything with incoming packets yet.
    412    1.1       cgd 	 */
    413   1.22   mycroft 	if (in_ifaddr.tqh_first == 0)
    414    1.1       cgd 		goto bad;
    415    1.1       cgd 	ipstat.ips_total++;
    416    1.1       cgd 	if (m->m_len < sizeof (struct ip) &&
    417    1.1       cgd 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
    418    1.1       cgd 		ipstat.ips_toosmall++;
    419   1.89    itojun 		return;
    420    1.1       cgd 	}
    421    1.1       cgd 	ip = mtod(m, struct ip *);
    422   1.13   mycroft 	if (ip->ip_v != IPVERSION) {
    423   1.13   mycroft 		ipstat.ips_badvers++;
    424   1.13   mycroft 		goto bad;
    425   1.13   mycroft 	}
    426    1.1       cgd 	hlen = ip->ip_hl << 2;
    427    1.1       cgd 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
    428    1.1       cgd 		ipstat.ips_badhlen++;
    429    1.1       cgd 		goto bad;
    430    1.1       cgd 	}
    431    1.1       cgd 	if (hlen > m->m_len) {
    432    1.1       cgd 		if ((m = m_pullup(m, hlen)) == 0) {
    433    1.1       cgd 			ipstat.ips_badhlen++;
    434   1.89    itojun 			return;
    435    1.1       cgd 		}
    436    1.1       cgd 		ip = mtod(m, struct ip *);
    437    1.1       cgd 	}
    438   1.98   thorpej 
    439   1.85       hwr 	/*
    440   1.99   thorpej 	 * RFC1122: packets with a multicast source address are
    441   1.98   thorpej 	 * not allowed.
    442   1.85       hwr 	 */
    443   1.85       hwr 	if (IN_MULTICAST(ip->ip_src.s_addr)) {
    444  1.130    itojun 		ipstat.ips_badaddr++;
    445   1.85       hwr 		goto bad;
    446  1.129    itojun 	}
    447  1.129    itojun 
    448  1.129    itojun 	/* 127/8 must not appear on wire - RFC1122 */
    449  1.129    itojun 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
    450  1.129    itojun 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
    451  1.130    itojun 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
    452  1.130    itojun 			ipstat.ips_badaddr++;
    453  1.129    itojun 			goto bad;
    454  1.130    itojun 		}
    455   1.85       hwr 	}
    456   1.85       hwr 
    457  1.135   thorpej 	switch (m->m_pkthdr.csum_flags &
    458  1.137   thorpej 		((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
    459  1.135   thorpej 		 M_CSUM_IPv4_BAD)) {
    460  1.135   thorpej 	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
    461  1.135   thorpej 		INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
    462  1.135   thorpej 		goto badcsum;
    463  1.135   thorpej 
    464  1.135   thorpej 	case M_CSUM_IPv4:
    465  1.135   thorpej 		/* Checksum was okay. */
    466  1.135   thorpej 		INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
    467  1.135   thorpej 		break;
    468  1.135   thorpej 
    469  1.135   thorpej 	default:
    470  1.135   thorpej 		/* Must compute it ourselves. */
    471  1.135   thorpej 		INET_CSUM_COUNTER_INCR(&ip_swcsum);
    472  1.135   thorpej 		if (in_cksum(m, hlen) != 0)
    473  1.135   thorpej 			goto bad;
    474  1.135   thorpej 		break;
    475    1.1       cgd 	}
    476    1.1       cgd 
    477  1.121   thorpej 	/* Retrieve the packet length. */
    478  1.121   thorpej 	len = ntohs(ip->ip_len);
    479   1.81     proff 
    480   1.81     proff 	/*
    481   1.81     proff 	 * Check for additional length bogosity
    482   1.81     proff 	 */
    483   1.84     proff 	if (len < hlen) {
    484   1.81     proff 	 	ipstat.ips_badlen++;
    485   1.81     proff 		goto bad;
    486   1.81     proff 	}
    487    1.1       cgd 
    488    1.1       cgd 	/*
    489    1.1       cgd 	 * Check that the amount of data in the buffers
    490    1.1       cgd 	 * is as at least much as the IP header would have us expect.
    491    1.1       cgd 	 * Trim mbufs if longer than we expect.
    492    1.1       cgd 	 * Drop packet if shorter than we expect.
    493    1.1       cgd 	 */
    494   1.35   mycroft 	if (m->m_pkthdr.len < len) {
    495    1.1       cgd 		ipstat.ips_tooshort++;
    496    1.1       cgd 		goto bad;
    497    1.1       cgd 	}
    498   1.35   mycroft 	if (m->m_pkthdr.len > len) {
    499    1.1       cgd 		if (m->m_len == m->m_pkthdr.len) {
    500   1.35   mycroft 			m->m_len = len;
    501   1.35   mycroft 			m->m_pkthdr.len = len;
    502    1.1       cgd 		} else
    503   1.35   mycroft 			m_adj(m, len - m->m_pkthdr.len);
    504    1.1       cgd 	}
    505    1.1       cgd 
    506   1.94    itojun #ifdef IPSEC
    507   1.94    itojun 	/* ipflow (IP fast fowarding) is not compatible with IPsec. */
    508   1.94    itojun 	m->m_flags &= ~M_CANFASTFWD;
    509   1.94    itojun #else
    510   1.64   thorpej 	/*
    511   1.64   thorpej 	 * Assume that we can create a fast-forward IP flow entry
    512   1.64   thorpej 	 * based on this packet.
    513   1.64   thorpej 	 */
    514   1.64   thorpej 	m->m_flags |= M_CANFASTFWD;
    515   1.94    itojun #endif
    516   1.64   thorpej 
    517   1.36       mrg #ifdef PFIL_HOOKS
    518   1.33       mrg 	/*
    519   1.64   thorpej 	 * Run through list of hooks for input packets.  If there are any
    520   1.64   thorpej 	 * filters which require that additional packets in the flow are
    521   1.64   thorpej 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
    522   1.64   thorpej 	 * Note that filters must _never_ set this flag, as another filter
    523   1.64   thorpej 	 * in the list may have previously cleared it.
    524   1.33       mrg 	 */
    525  1.127    itojun 	/*
    526  1.127    itojun 	 * let ipfilter look at packet on the wire,
    527  1.127    itojun 	 * not the decapsulated packet.
    528  1.127    itojun 	 */
    529  1.127    itojun #ifdef IPSEC
    530  1.136    itojun 	if (!ipsec_getnhist(m))
    531  1.127    itojun #else
    532  1.127    itojun 	if (1)
    533  1.127    itojun #endif
    534  1.127    itojun 	{
    535  1.127    itojun 		if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
    536  1.127    itojun 				   PFIL_IN) != 0)
    537  1.121   thorpej 		return;
    538  1.127    itojun 		if (m == NULL)
    539  1.127    itojun 			return;
    540  1.127    itojun 		ip = mtod(m, struct ip *);
    541  1.127    itojun 	}
    542   1.36       mrg #endif /* PFIL_HOOKS */
    543  1.123   thorpej 
    544  1.123   thorpej #ifdef ALTQ
    545  1.123   thorpej 	/* XXX Temporary until ALTQ is changed to use a pfil hook */
    546  1.123   thorpej 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) {
    547  1.123   thorpej 		/* packet dropped by traffic conditioner */
    548  1.123   thorpej 		return;
    549  1.123   thorpej 	}
    550  1.123   thorpej #endif
    551  1.121   thorpej 
    552  1.121   thorpej 	/*
    553  1.121   thorpej 	 * Convert fields to host representation.
    554  1.121   thorpej 	 */
    555  1.121   thorpej 	NTOHS(ip->ip_len);
    556  1.121   thorpej 	NTOHS(ip->ip_off);
    557   1.33       mrg 
    558    1.1       cgd 	/*
    559    1.1       cgd 	 * Process options and, if not destined for us,
    560    1.1       cgd 	 * ship it on.  ip_dooptions returns 1 when an
    561    1.1       cgd 	 * error was detected (causing an icmp message
    562    1.1       cgd 	 * to be sent and the original packet to be freed).
    563    1.1       cgd 	 */
    564    1.1       cgd 	ip_nhops = 0;		/* for source routed packets */
    565    1.1       cgd 	if (hlen > sizeof (struct ip) && ip_dooptions(m))
    566   1.89    itojun 		return;
    567    1.1       cgd 
    568    1.1       cgd 	/*
    569    1.1       cgd 	 * Check our list of addresses, to see if the packet is for us.
    570  1.100    itojun 	 *
    571  1.100    itojun 	 * Traditional 4.4BSD did not consult IFF_UP at all.
    572  1.100    itojun 	 * The behavior here is to treat addresses on !IFF_UP interface
    573  1.100    itojun 	 * as not mine.
    574    1.1       cgd 	 */
    575  1.100    itojun 	downmatch = 0;
    576   1.97    itojun 	for (ia = IN_IFADDR_HASH(ip->ip_dst.s_addr).lh_first;
    577   1.97    itojun 	     ia != NULL;
    578   1.97    itojun 	     ia = ia->ia_hash.le_next) {
    579   1.97    itojun 		if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
    580   1.97    itojun 			if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
    581   1.97    itojun 				break;
    582  1.100    itojun 			else
    583  1.100    itojun 				downmatch++;
    584   1.97    itojun 		}
    585   1.97    itojun 	}
    586   1.86   thorpej 	if (ia != NULL)
    587   1.86   thorpej 		goto ours;
    588   1.57       tls 	if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
    589   1.57       tls 		for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
    590   1.57       tls 		    ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
    591   1.57       tls 			if (ifa->ifa_addr->sa_family != AF_INET) continue;
    592   1.57       tls 			ia = ifatoia(ifa);
    593   1.35   mycroft 			if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
    594   1.35   mycroft 			    in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
    595   1.20   mycroft 			    /*
    596   1.20   mycroft 			     * Look for all-0's host part (old broadcast addr),
    597   1.20   mycroft 			     * either for subnet or net.
    598   1.20   mycroft 			     */
    599   1.20   mycroft 			    ip->ip_dst.s_addr == ia->ia_subnet ||
    600   1.18   mycroft 			    ip->ip_dst.s_addr == ia->ia_net)
    601    1.1       cgd 				goto ours;
    602   1.57       tls 			/*
    603   1.57       tls 			 * An interface with IP address zero accepts
    604   1.57       tls 			 * all packets that arrive on that interface.
    605   1.57       tls 			 */
    606   1.57       tls 			if (in_nullhost(ia->ia_addr.sin_addr))
    607   1.57       tls 				goto ours;
    608    1.1       cgd 		}
    609    1.1       cgd 	}
    610   1.18   mycroft 	if (IN_MULTICAST(ip->ip_dst.s_addr)) {
    611    1.4   hpeyerl 		struct in_multi *inm;
    612    1.4   hpeyerl #ifdef MROUTING
    613    1.4   hpeyerl 		extern struct socket *ip_mrouter;
    614   1.10    brezak 
    615   1.10    brezak 		if (m->m_flags & M_EXT) {
    616   1.10    brezak 			if ((m = m_pullup(m, hlen)) == 0) {
    617   1.10    brezak 				ipstat.ips_toosmall++;
    618   1.89    itojun 				return;
    619   1.10    brezak 			}
    620   1.10    brezak 			ip = mtod(m, struct ip *);
    621   1.10    brezak 		}
    622    1.4   hpeyerl 
    623    1.4   hpeyerl 		if (ip_mrouter) {
    624    1.4   hpeyerl 			/*
    625    1.4   hpeyerl 			 * If we are acting as a multicast router, all
    626    1.4   hpeyerl 			 * incoming multicast packets are passed to the
    627    1.4   hpeyerl 			 * kernel-level multicast forwarding function.
    628    1.4   hpeyerl 			 * The packet is returned (relatively) intact; if
    629    1.4   hpeyerl 			 * ip_mforward() returns a non-zero value, the packet
    630    1.4   hpeyerl 			 * must be discarded, else it may be accepted below.
    631    1.4   hpeyerl 			 *
    632    1.4   hpeyerl 			 * (The IP ident field is put in the same byte order
    633    1.4   hpeyerl 			 * as expected when ip_mforward() is called from
    634    1.4   hpeyerl 			 * ip_output().)
    635    1.4   hpeyerl 			 */
    636   1.13   mycroft 			if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
    637   1.13   mycroft 				ipstat.ips_cantforward++;
    638    1.4   hpeyerl 				m_freem(m);
    639   1.89    itojun 				return;
    640    1.4   hpeyerl 			}
    641    1.4   hpeyerl 
    642    1.4   hpeyerl 			/*
    643    1.4   hpeyerl 			 * The process-level routing demon needs to receive
    644    1.4   hpeyerl 			 * all multicast IGMP packets, whether or not this
    645    1.4   hpeyerl 			 * host belongs to their destination groups.
    646    1.4   hpeyerl 			 */
    647    1.4   hpeyerl 			if (ip->ip_p == IPPROTO_IGMP)
    648    1.4   hpeyerl 				goto ours;
    649   1.13   mycroft 			ipstat.ips_forward++;
    650    1.4   hpeyerl 		}
    651    1.4   hpeyerl #endif
    652    1.4   hpeyerl 		/*
    653    1.4   hpeyerl 		 * See if we belong to the destination multicast group on the
    654    1.4   hpeyerl 		 * arrival interface.
    655    1.4   hpeyerl 		 */
    656    1.4   hpeyerl 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
    657    1.4   hpeyerl 		if (inm == NULL) {
    658   1.13   mycroft 			ipstat.ips_cantforward++;
    659    1.4   hpeyerl 			m_freem(m);
    660   1.89    itojun 			return;
    661    1.4   hpeyerl 		}
    662    1.4   hpeyerl 		goto ours;
    663    1.4   hpeyerl 	}
    664   1.19   mycroft 	if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    665   1.35   mycroft 	    in_nullhost(ip->ip_dst))
    666    1.1       cgd 		goto ours;
    667    1.1       cgd 
    668    1.1       cgd 	/*
    669    1.1       cgd 	 * Not for us; forward if possible and desirable.
    670    1.1       cgd 	 */
    671    1.1       cgd 	if (ipforwarding == 0) {
    672    1.1       cgd 		ipstat.ips_cantforward++;
    673    1.1       cgd 		m_freem(m);
    674  1.100    itojun 	} else {
    675  1.100    itojun 		/*
    676  1.100    itojun 		 * If ip_dst matched any of my address on !IFF_UP interface,
    677  1.100    itojun 		 * and there's no IFF_UP interface that matches ip_dst,
    678  1.100    itojun 		 * send icmp unreach.  Forwarding it will result in in-kernel
    679  1.100    itojun 		 * forwarding loop till TTL goes to 0.
    680  1.100    itojun 		 */
    681  1.100    itojun 		if (downmatch) {
    682  1.100    itojun 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
    683  1.100    itojun 			ipstat.ips_cantforward++;
    684  1.100    itojun 			return;
    685  1.100    itojun 		}
    686    1.1       cgd 		ip_forward(m, 0);
    687  1.100    itojun 	}
    688   1.89    itojun 	return;
    689    1.1       cgd 
    690    1.1       cgd ours:
    691    1.1       cgd 	/*
    692    1.1       cgd 	 * If offset or IP_MF are set, must reassemble.
    693    1.1       cgd 	 * Otherwise, nothing need be done.
    694    1.1       cgd 	 * (We could look in the reassembly queue to see
    695    1.1       cgd 	 * if the packet was previously fragmented,
    696    1.1       cgd 	 * but it's not worth the time; just let them time out.)
    697    1.1       cgd 	 */
    698   1.37     perry 	if (ip->ip_off & ~(IP_DF|IP_RF)) {
    699    1.1       cgd 		/*
    700    1.1       cgd 		 * Look for queue of fragments
    701    1.1       cgd 		 * of this datagram.
    702    1.1       cgd 		 */
    703   1.75   thorpej 		IPQ_LOCK();
    704   1.25       cgd 		for (fp = ipq.lh_first; fp != NULL; fp = fp->ipq_q.le_next)
    705    1.1       cgd 			if (ip->ip_id == fp->ipq_id &&
    706   1.35   mycroft 			    in_hosteq(ip->ip_src, fp->ipq_src) &&
    707   1.35   mycroft 			    in_hosteq(ip->ip_dst, fp->ipq_dst) &&
    708    1.1       cgd 			    ip->ip_p == fp->ipq_p)
    709    1.1       cgd 				goto found;
    710    1.1       cgd 		fp = 0;
    711    1.1       cgd found:
    712    1.1       cgd 
    713    1.1       cgd 		/*
    714    1.1       cgd 		 * Adjust ip_len to not reflect header,
    715   1.25       cgd 		 * set ipqe_mff if more fragments are expected,
    716    1.1       cgd 		 * convert offset of this to bytes.
    717    1.1       cgd 		 */
    718    1.1       cgd 		ip->ip_len -= hlen;
    719   1.25       cgd 		mff = (ip->ip_off & IP_MF) != 0;
    720   1.25       cgd 		if (mff) {
    721   1.16       cgd 		        /*
    722   1.16       cgd 		         * Make sure that fragments have a data length
    723   1.16       cgd 			 * that's a non-zero multiple of 8 bytes.
    724   1.16       cgd 		         */
    725   1.17       cgd 			if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
    726   1.16       cgd 				ipstat.ips_badfrags++;
    727   1.75   thorpej 				IPQ_UNLOCK();
    728   1.16       cgd 				goto bad;
    729   1.16       cgd 			}
    730   1.16       cgd 		}
    731    1.1       cgd 		ip->ip_off <<= 3;
    732    1.1       cgd 
    733    1.1       cgd 		/*
    734    1.1       cgd 		 * If datagram marked as having more fragments
    735    1.1       cgd 		 * or if this is not the first fragment,
    736    1.1       cgd 		 * attempt reassembly; if it succeeds, proceed.
    737    1.1       cgd 		 */
    738   1.25       cgd 		if (mff || ip->ip_off) {
    739    1.1       cgd 			ipstat.ips_fragments++;
    740   1.72   thorpej 			ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
    741   1.25       cgd 			if (ipqe == NULL) {
    742   1.25       cgd 				ipstat.ips_rcvmemdrop++;
    743   1.75   thorpej 				IPQ_UNLOCK();
    744   1.25       cgd 				goto bad;
    745   1.25       cgd 			}
    746   1.25       cgd 			ipqe->ipqe_mff = mff;
    747   1.50   thorpej 			ipqe->ipqe_m = m;
    748   1.25       cgd 			ipqe->ipqe_ip = ip;
    749   1.50   thorpej 			m = ip_reass(ipqe, fp);
    750   1.75   thorpej 			if (m == 0) {
    751   1.75   thorpej 				IPQ_UNLOCK();
    752   1.89    itojun 				return;
    753   1.75   thorpej 			}
    754   1.13   mycroft 			ipstat.ips_reassembled++;
    755   1.50   thorpej 			ip = mtod(m, struct ip *);
    756   1.74   thorpej 			hlen = ip->ip_hl << 2;
    757   1.79   mycroft 			ip->ip_len += hlen;
    758    1.1       cgd 		} else
    759    1.1       cgd 			if (fp)
    760    1.1       cgd 				ip_freef(fp);
    761   1.75   thorpej 		IPQ_UNLOCK();
    762   1.79   mycroft 	}
    763  1.128    itojun 
    764  1.128    itojun #ifdef IPSEC
    765  1.128    itojun 	/*
    766  1.128    itojun 	 * enforce IPsec policy checking if we are seeing last header.
    767  1.128    itojun 	 * note that we do not visit this with protocols with pcb layer
    768  1.128    itojun 	 * code - like udp/tcp/raw ip.
    769  1.128    itojun 	 */
    770  1.128    itojun 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
    771  1.128    itojun 	    ipsec4_in_reject(m, NULL)) {
    772  1.128    itojun 		ipsecstat.in_polvio++;
    773  1.128    itojun 		goto bad;
    774  1.128    itojun 	}
    775  1.128    itojun #endif
    776    1.1       cgd 
    777    1.1       cgd 	/*
    778    1.1       cgd 	 * Switch out to protocol's input routine.
    779    1.1       cgd 	 */
    780   1.82     aidan #if IFA_STATS
    781  1.122    itojun 	if (ia && ip)
    782  1.122    itojun 		ia->ia_ifa.ifa_data.ifad_inbytes += ip->ip_len;
    783   1.82     aidan #endif
    784    1.1       cgd 	ipstat.ips_delivered++;
    785   1.89    itojun     {
    786   1.89    itojun 	int off = hlen, nh = ip->ip_p;
    787   1.89    itojun 
    788   1.89    itojun 	(*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
    789   1.89    itojun 	return;
    790   1.89    itojun     }
    791    1.1       cgd bad:
    792    1.1       cgd 	m_freem(m);
    793  1.135   thorpej 	return;
    794  1.135   thorpej 
    795  1.135   thorpej badcsum:
    796  1.135   thorpej 	ipstat.ips_badsum++;
    797  1.135   thorpej 	m_freem(m);
    798    1.1       cgd }
    799    1.1       cgd 
    800    1.1       cgd /*
    801    1.1       cgd  * Take incoming datagram fragment and try to
    802    1.1       cgd  * reassemble it into whole datagram.  If a chain for
    803    1.1       cgd  * reassembly of this datagram already exists, then it
    804    1.1       cgd  * is given as fp; otherwise have to make a chain.
    805    1.1       cgd  */
    806   1.50   thorpej struct mbuf *
    807   1.25       cgd ip_reass(ipqe, fp)
    808  1.109  augustss 	struct ipqent *ipqe;
    809  1.109  augustss 	struct ipq *fp;
    810    1.1       cgd {
    811  1.109  augustss 	struct mbuf *m = ipqe->ipqe_m;
    812  1.109  augustss 	struct ipqent *nq, *p, *q;
    813   1.25       cgd 	struct ip *ip;
    814    1.1       cgd 	struct mbuf *t;
    815   1.25       cgd 	int hlen = ipqe->ipqe_ip->ip_hl << 2;
    816    1.1       cgd 	int i, next;
    817    1.1       cgd 
    818   1.75   thorpej 	IPQ_LOCK_CHECK();
    819   1.75   thorpej 
    820    1.1       cgd 	/*
    821    1.1       cgd 	 * Presence of header sizes in mbufs
    822    1.1       cgd 	 * would confuse code below.
    823    1.1       cgd 	 */
    824    1.1       cgd 	m->m_data += hlen;
    825    1.1       cgd 	m->m_len -= hlen;
    826    1.1       cgd 
    827    1.1       cgd 	/*
    828    1.1       cgd 	 * If first fragment to arrive, create a reassembly queue.
    829    1.1       cgd 	 */
    830    1.1       cgd 	if (fp == 0) {
    831  1.131    itojun 		/*
    832  1.131    itojun 		 * Enforce upper bound on number of fragmented packets
    833  1.131    itojun 		 * for which we attempt reassembly;
    834  1.131    itojun 		 * If maxfrag is 0, never accept fragments.
    835  1.131    itojun 		 * If maxfrag is -1, accept all fragments without limitation.
    836  1.131    itojun 		 */
    837  1.131    itojun 		if (ip_maxfragpackets < 0)
    838  1.131    itojun 			;
    839  1.131    itojun 		else if (ip_nfragpackets >= ip_maxfragpackets)
    840  1.131    itojun 			goto dropfrag;
    841  1.131    itojun 		ip_nfragpackets++;
    842   1.50   thorpej 		MALLOC(fp, struct ipq *, sizeof (struct ipq),
    843   1.50   thorpej 		    M_FTABLE, M_NOWAIT);
    844   1.50   thorpej 		if (fp == NULL)
    845    1.1       cgd 			goto dropfrag;
    846   1.25       cgd 		LIST_INSERT_HEAD(&ipq, fp, ipq_q);
    847    1.1       cgd 		fp->ipq_ttl = IPFRAGTTL;
    848   1.25       cgd 		fp->ipq_p = ipqe->ipqe_ip->ip_p;
    849   1.25       cgd 		fp->ipq_id = ipqe->ipqe_ip->ip_id;
    850   1.25       cgd 		LIST_INIT(&fp->ipq_fragq);
    851   1.25       cgd 		fp->ipq_src = ipqe->ipqe_ip->ip_src;
    852   1.25       cgd 		fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
    853   1.25       cgd 		p = NULL;
    854    1.1       cgd 		goto insert;
    855    1.1       cgd 	}
    856    1.1       cgd 
    857    1.1       cgd 	/*
    858    1.1       cgd 	 * Find a segment which begins after this one does.
    859    1.1       cgd 	 */
    860   1.25       cgd 	for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
    861   1.25       cgd 	    p = q, q = q->ipqe_q.le_next)
    862   1.25       cgd 		if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off)
    863    1.1       cgd 			break;
    864    1.1       cgd 
    865    1.1       cgd 	/*
    866    1.1       cgd 	 * If there is a preceding segment, it may provide some of
    867    1.1       cgd 	 * our data already.  If so, drop the data from the incoming
    868    1.1       cgd 	 * segment.  If it provides all of our data, drop us.
    869    1.1       cgd 	 */
    870   1.25       cgd 	if (p != NULL) {
    871   1.25       cgd 		i = p->ipqe_ip->ip_off + p->ipqe_ip->ip_len -
    872   1.25       cgd 		    ipqe->ipqe_ip->ip_off;
    873    1.1       cgd 		if (i > 0) {
    874   1.25       cgd 			if (i >= ipqe->ipqe_ip->ip_len)
    875    1.1       cgd 				goto dropfrag;
    876   1.50   thorpej 			m_adj(ipqe->ipqe_m, i);
    877   1.25       cgd 			ipqe->ipqe_ip->ip_off += i;
    878   1.25       cgd 			ipqe->ipqe_ip->ip_len -= i;
    879    1.1       cgd 		}
    880    1.1       cgd 	}
    881    1.1       cgd 
    882    1.1       cgd 	/*
    883    1.1       cgd 	 * While we overlap succeeding segments trim them or,
    884    1.1       cgd 	 * if they are completely covered, dequeue them.
    885    1.1       cgd 	 */
    886   1.25       cgd 	for (; q != NULL && ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len >
    887   1.25       cgd 	    q->ipqe_ip->ip_off; q = nq) {
    888   1.25       cgd 		i = (ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len) -
    889   1.25       cgd 		    q->ipqe_ip->ip_off;
    890   1.25       cgd 		if (i < q->ipqe_ip->ip_len) {
    891   1.25       cgd 			q->ipqe_ip->ip_len -= i;
    892   1.25       cgd 			q->ipqe_ip->ip_off += i;
    893   1.50   thorpej 			m_adj(q->ipqe_m, i);
    894    1.1       cgd 			break;
    895    1.1       cgd 		}
    896   1.25       cgd 		nq = q->ipqe_q.le_next;
    897   1.50   thorpej 		m_freem(q->ipqe_m);
    898   1.25       cgd 		LIST_REMOVE(q, ipqe_q);
    899   1.72   thorpej 		pool_put(&ipqent_pool, q);
    900    1.1       cgd 	}
    901    1.1       cgd 
    902    1.1       cgd insert:
    903    1.1       cgd 	/*
    904    1.1       cgd 	 * Stick new segment in its place;
    905    1.1       cgd 	 * check for complete reassembly.
    906    1.1       cgd 	 */
    907   1.25       cgd 	if (p == NULL) {
    908   1.25       cgd 		LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
    909   1.25       cgd 	} else {
    910   1.25       cgd 		LIST_INSERT_AFTER(p, ipqe, ipqe_q);
    911   1.25       cgd 	}
    912    1.1       cgd 	next = 0;
    913   1.25       cgd 	for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
    914   1.25       cgd 	    p = q, q = q->ipqe_q.le_next) {
    915   1.25       cgd 		if (q->ipqe_ip->ip_off != next)
    916    1.1       cgd 			return (0);
    917   1.25       cgd 		next += q->ipqe_ip->ip_len;
    918    1.1       cgd 	}
    919   1.25       cgd 	if (p->ipqe_mff)
    920    1.1       cgd 		return (0);
    921    1.1       cgd 
    922    1.1       cgd 	/*
    923   1.41   thorpej 	 * Reassembly is complete.  Check for a bogus message size and
    924   1.41   thorpej 	 * concatenate fragments.
    925    1.1       cgd 	 */
    926   1.25       cgd 	q = fp->ipq_fragq.lh_first;
    927   1.25       cgd 	ip = q->ipqe_ip;
    928   1.41   thorpej 	if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
    929   1.41   thorpej 		ipstat.ips_toolong++;
    930   1.41   thorpej 		ip_freef(fp);
    931   1.41   thorpej 		return (0);
    932   1.41   thorpej 	}
    933   1.50   thorpej 	m = q->ipqe_m;
    934    1.1       cgd 	t = m->m_next;
    935    1.1       cgd 	m->m_next = 0;
    936    1.1       cgd 	m_cat(m, t);
    937   1.25       cgd 	nq = q->ipqe_q.le_next;
    938   1.72   thorpej 	pool_put(&ipqent_pool, q);
    939   1.25       cgd 	for (q = nq; q != NULL; q = nq) {
    940   1.50   thorpej 		t = q->ipqe_m;
    941   1.25       cgd 		nq = q->ipqe_q.le_next;
    942   1.72   thorpej 		pool_put(&ipqent_pool, q);
    943    1.1       cgd 		m_cat(m, t);
    944    1.1       cgd 	}
    945    1.1       cgd 
    946    1.1       cgd 	/*
    947    1.1       cgd 	 * Create header for new ip packet by
    948    1.1       cgd 	 * modifying header of first packet;
    949    1.1       cgd 	 * dequeue and discard fragment reassembly header.
    950    1.1       cgd 	 * Make header visible.
    951    1.1       cgd 	 */
    952    1.1       cgd 	ip->ip_len = next;
    953   1.25       cgd 	ip->ip_src = fp->ipq_src;
    954   1.25       cgd 	ip->ip_dst = fp->ipq_dst;
    955   1.25       cgd 	LIST_REMOVE(fp, ipq_q);
    956   1.50   thorpej 	FREE(fp, M_FTABLE);
    957  1.131    itojun 	ip_nfragpackets--;
    958    1.1       cgd 	m->m_len += (ip->ip_hl << 2);
    959    1.1       cgd 	m->m_data -= (ip->ip_hl << 2);
    960    1.1       cgd 	/* some debugging cruft by sklower, below, will go away soon */
    961    1.1       cgd 	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
    962  1.109  augustss 		int plen = 0;
    963   1.50   thorpej 		for (t = m; t; t = t->m_next)
    964   1.50   thorpej 			plen += t->m_len;
    965   1.50   thorpej 		m->m_pkthdr.len = plen;
    966    1.1       cgd 	}
    967   1.50   thorpej 	return (m);
    968    1.1       cgd 
    969    1.1       cgd dropfrag:
    970    1.1       cgd 	ipstat.ips_fragdropped++;
    971    1.1       cgd 	m_freem(m);
    972   1.72   thorpej 	pool_put(&ipqent_pool, ipqe);
    973    1.1       cgd 	return (0);
    974    1.1       cgd }
    975    1.1       cgd 
    976    1.1       cgd /*
    977    1.1       cgd  * Free a fragment reassembly header and all
    978    1.1       cgd  * associated datagrams.
    979    1.1       cgd  */
    980    1.8   mycroft void
    981    1.1       cgd ip_freef(fp)
    982    1.1       cgd 	struct ipq *fp;
    983    1.1       cgd {
    984  1.109  augustss 	struct ipqent *q, *p;
    985    1.1       cgd 
    986   1.75   thorpej 	IPQ_LOCK_CHECK();
    987   1.75   thorpej 
    988   1.25       cgd 	for (q = fp->ipq_fragq.lh_first; q != NULL; q = p) {
    989   1.25       cgd 		p = q->ipqe_q.le_next;
    990   1.50   thorpej 		m_freem(q->ipqe_m);
    991   1.25       cgd 		LIST_REMOVE(q, ipqe_q);
    992   1.72   thorpej 		pool_put(&ipqent_pool, q);
    993    1.1       cgd 	}
    994   1.25       cgd 	LIST_REMOVE(fp, ipq_q);
    995   1.50   thorpej 	FREE(fp, M_FTABLE);
    996  1.131    itojun 	ip_nfragpackets--;
    997    1.1       cgd }
    998    1.1       cgd 
    999    1.1       cgd /*
   1000    1.1       cgd  * IP timer processing;
   1001    1.1       cgd  * if a timer expires on a reassembly
   1002    1.1       cgd  * queue, discard it.
   1003    1.1       cgd  */
   1004    1.8   mycroft void
   1005    1.1       cgd ip_slowtimo()
   1006    1.1       cgd {
   1007  1.109  augustss 	struct ipq *fp, *nfp;
   1008   1.24   mycroft 	int s = splsoftnet();
   1009    1.1       cgd 
   1010   1.75   thorpej 	IPQ_LOCK();
   1011   1.25       cgd 	for (fp = ipq.lh_first; fp != NULL; fp = nfp) {
   1012   1.25       cgd 		nfp = fp->ipq_q.le_next;
   1013   1.25       cgd 		if (--fp->ipq_ttl == 0) {
   1014    1.1       cgd 			ipstat.ips_fragtimeout++;
   1015   1.25       cgd 			ip_freef(fp);
   1016    1.1       cgd 		}
   1017    1.1       cgd 	}
   1018  1.131    itojun 	/*
   1019  1.131    itojun 	 * If we are over the maximum number of fragments
   1020  1.131    itojun 	 * (due to the limit being lowered), drain off
   1021  1.131    itojun 	 * enough to get down to the new limit.
   1022  1.131    itojun 	 */
   1023  1.131    itojun 	if (ip_maxfragpackets < 0)
   1024  1.131    itojun 		;
   1025  1.131    itojun 	else {
   1026  1.131    itojun 		while (ip_nfragpackets > ip_maxfragpackets && ipq.lh_first)
   1027  1.131    itojun 			ip_freef(ipq.lh_first);
   1028  1.131    itojun 	}
   1029   1.75   thorpej 	IPQ_UNLOCK();
   1030   1.63      matt #ifdef GATEWAY
   1031   1.63      matt 	ipflow_slowtimo();
   1032   1.63      matt #endif
   1033    1.1       cgd 	splx(s);
   1034    1.1       cgd }
   1035    1.1       cgd 
   1036    1.1       cgd /*
   1037    1.1       cgd  * Drain off all datagram fragments.
   1038    1.1       cgd  */
   1039    1.8   mycroft void
   1040    1.1       cgd ip_drain()
   1041    1.1       cgd {
   1042    1.1       cgd 
   1043   1.75   thorpej 	/*
   1044   1.75   thorpej 	 * We may be called from a device's interrupt context.  If
   1045   1.75   thorpej 	 * the ipq is already busy, just bail out now.
   1046   1.75   thorpej 	 */
   1047   1.75   thorpej 	if (ipq_lock_try() == 0)
   1048   1.75   thorpej 		return;
   1049   1.75   thorpej 
   1050   1.25       cgd 	while (ipq.lh_first != NULL) {
   1051    1.1       cgd 		ipstat.ips_fragdropped++;
   1052   1.25       cgd 		ip_freef(ipq.lh_first);
   1053    1.1       cgd 	}
   1054   1.75   thorpej 
   1055   1.75   thorpej 	IPQ_UNLOCK();
   1056    1.1       cgd }
   1057    1.1       cgd 
   1058    1.1       cgd /*
   1059    1.1       cgd  * Do option processing on a datagram,
   1060    1.1       cgd  * possibly discarding it if bad options are encountered,
   1061    1.1       cgd  * or forwarding it if source-routed.
   1062    1.1       cgd  * Returns 1 if packet has been forwarded/freed,
   1063    1.1       cgd  * 0 if the packet should be processed further.
   1064    1.1       cgd  */
   1065    1.8   mycroft int
   1066    1.1       cgd ip_dooptions(m)
   1067    1.1       cgd 	struct mbuf *m;
   1068    1.1       cgd {
   1069  1.109  augustss 	struct ip *ip = mtod(m, struct ip *);
   1070  1.109  augustss 	u_char *cp, *cp0;
   1071  1.109  augustss 	struct ip_timestamp *ipt;
   1072  1.109  augustss 	struct in_ifaddr *ia;
   1073    1.1       cgd 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
   1074  1.104   thorpej 	struct in_addr dst;
   1075    1.1       cgd 	n_time ntime;
   1076    1.1       cgd 
   1077   1.13   mycroft 	dst = ip->ip_dst;
   1078    1.1       cgd 	cp = (u_char *)(ip + 1);
   1079    1.1       cgd 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
   1080    1.1       cgd 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
   1081    1.1       cgd 		opt = cp[IPOPT_OPTVAL];
   1082    1.1       cgd 		if (opt == IPOPT_EOL)
   1083    1.1       cgd 			break;
   1084    1.1       cgd 		if (opt == IPOPT_NOP)
   1085    1.1       cgd 			optlen = 1;
   1086    1.1       cgd 		else {
   1087  1.113    itojun 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
   1088  1.113    itojun 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1089  1.113    itojun 				goto bad;
   1090  1.113    itojun 			}
   1091    1.1       cgd 			optlen = cp[IPOPT_OLEN];
   1092  1.114    itojun 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
   1093    1.1       cgd 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1094    1.1       cgd 				goto bad;
   1095    1.1       cgd 			}
   1096    1.1       cgd 		}
   1097    1.1       cgd 		switch (opt) {
   1098    1.1       cgd 
   1099    1.1       cgd 		default:
   1100    1.1       cgd 			break;
   1101    1.1       cgd 
   1102    1.1       cgd 		/*
   1103    1.1       cgd 		 * Source routing with record.
   1104    1.1       cgd 		 * Find interface with current destination address.
   1105    1.1       cgd 		 * If none on this machine then drop if strictly routed,
   1106    1.1       cgd 		 * or do nothing if loosely routed.
   1107    1.1       cgd 		 * Record interface address and bring up next address
   1108    1.1       cgd 		 * component.  If strictly routed make sure next
   1109    1.1       cgd 		 * address is on directly accessible net.
   1110    1.1       cgd 		 */
   1111    1.1       cgd 		case IPOPT_LSRR:
   1112    1.1       cgd 		case IPOPT_SSRR:
   1113   1.47       cjs 			if (ip_allowsrcrt == 0) {
   1114   1.47       cjs 				type = ICMP_UNREACH;
   1115   1.47       cjs 				code = ICMP_UNREACH_NET_PROHIB;
   1116   1.47       cjs 				goto bad;
   1117   1.47       cjs 			}
   1118  1.114    itojun 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
   1119  1.114    itojun 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1120  1.114    itojun 				goto bad;
   1121  1.114    itojun 			}
   1122    1.1       cgd 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
   1123    1.1       cgd 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
   1124    1.1       cgd 				goto bad;
   1125    1.1       cgd 			}
   1126    1.1       cgd 			ipaddr.sin_addr = ip->ip_dst;
   1127   1.19   mycroft 			ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
   1128    1.1       cgd 			if (ia == 0) {
   1129    1.1       cgd 				if (opt == IPOPT_SSRR) {
   1130    1.1       cgd 					type = ICMP_UNREACH;
   1131    1.1       cgd 					code = ICMP_UNREACH_SRCFAIL;
   1132    1.1       cgd 					goto bad;
   1133    1.1       cgd 				}
   1134    1.1       cgd 				/*
   1135    1.1       cgd 				 * Loose routing, and not at next destination
   1136    1.1       cgd 				 * yet; nothing to do except forward.
   1137    1.1       cgd 				 */
   1138    1.1       cgd 				break;
   1139    1.1       cgd 			}
   1140    1.1       cgd 			off--;			/* 0 origin */
   1141  1.112  sommerfe 			if ((off + sizeof(struct in_addr)) > optlen) {
   1142    1.1       cgd 				/*
   1143    1.1       cgd 				 * End of source route.  Should be for us.
   1144    1.1       cgd 				 */
   1145    1.1       cgd 				save_rte(cp, ip->ip_src);
   1146    1.1       cgd 				break;
   1147    1.1       cgd 			}
   1148    1.1       cgd 			/*
   1149    1.1       cgd 			 * locate outgoing interface
   1150    1.1       cgd 			 */
   1151    1.1       cgd 			bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
   1152    1.1       cgd 			    sizeof(ipaddr.sin_addr));
   1153   1.96   thorpej 			if (opt == IPOPT_SSRR)
   1154   1.96   thorpej 				ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
   1155   1.96   thorpej 			else
   1156    1.1       cgd 				ia = ip_rtaddr(ipaddr.sin_addr);
   1157    1.1       cgd 			if (ia == 0) {
   1158    1.1       cgd 				type = ICMP_UNREACH;
   1159    1.1       cgd 				code = ICMP_UNREACH_SRCFAIL;
   1160    1.1       cgd 				goto bad;
   1161    1.1       cgd 			}
   1162    1.1       cgd 			ip->ip_dst = ipaddr.sin_addr;
   1163   1.20   mycroft 			bcopy((caddr_t)&ia->ia_addr.sin_addr,
   1164    1.1       cgd 			    (caddr_t)(cp + off), sizeof(struct in_addr));
   1165    1.1       cgd 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
   1166   1.13   mycroft 			/*
   1167   1.13   mycroft 			 * Let ip_intr's mcast routing check handle mcast pkts
   1168   1.13   mycroft 			 */
   1169   1.18   mycroft 			forward = !IN_MULTICAST(ip->ip_dst.s_addr);
   1170    1.1       cgd 			break;
   1171    1.1       cgd 
   1172    1.1       cgd 		case IPOPT_RR:
   1173  1.114    itojun 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
   1174  1.114    itojun 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1175  1.114    itojun 				goto bad;
   1176  1.114    itojun 			}
   1177    1.1       cgd 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
   1178    1.1       cgd 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
   1179    1.1       cgd 				goto bad;
   1180    1.1       cgd 			}
   1181    1.1       cgd 			/*
   1182    1.1       cgd 			 * If no space remains, ignore.
   1183    1.1       cgd 			 */
   1184    1.1       cgd 			off--;			/* 0 origin */
   1185  1.112  sommerfe 			if ((off + sizeof(struct in_addr)) > optlen)
   1186    1.1       cgd 				break;
   1187    1.1       cgd 			bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
   1188    1.1       cgd 			    sizeof(ipaddr.sin_addr));
   1189    1.1       cgd 			/*
   1190    1.1       cgd 			 * locate outgoing interface; if we're the destination,
   1191    1.1       cgd 			 * use the incoming interface (should be same).
   1192    1.1       cgd 			 */
   1193   1.96   thorpej 			if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
   1194   1.96   thorpej 			    == NULL &&
   1195   1.96   thorpej 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
   1196    1.1       cgd 				type = ICMP_UNREACH;
   1197    1.1       cgd 				code = ICMP_UNREACH_HOST;
   1198    1.1       cgd 				goto bad;
   1199    1.1       cgd 			}
   1200   1.20   mycroft 			bcopy((caddr_t)&ia->ia_addr.sin_addr,
   1201    1.1       cgd 			    (caddr_t)(cp + off), sizeof(struct in_addr));
   1202    1.1       cgd 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
   1203    1.1       cgd 			break;
   1204    1.1       cgd 
   1205    1.1       cgd 		case IPOPT_TS:
   1206    1.1       cgd 			code = cp - (u_char *)ip;
   1207    1.1       cgd 			ipt = (struct ip_timestamp *)cp;
   1208  1.114    itojun 			if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
   1209  1.114    itojun 				code = (u_char *)&ipt->ipt_len - (u_char *)ip;
   1210    1.1       cgd 				goto bad;
   1211  1.114    itojun 			}
   1212  1.114    itojun 			if (ipt->ipt_ptr < 5) {
   1213  1.114    itojun 				code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
   1214  1.114    itojun 				goto bad;
   1215  1.114    itojun 			}
   1216   1.15       cgd 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
   1217  1.114    itojun 				if (++ipt->ipt_oflw == 0) {
   1218  1.114    itojun 					code = (u_char *)&ipt->ipt_ptr -
   1219  1.114    itojun 					    (u_char *)ip;
   1220    1.1       cgd 					goto bad;
   1221  1.114    itojun 				}
   1222    1.1       cgd 				break;
   1223    1.1       cgd 			}
   1224  1.104   thorpej 			cp0 = (cp + ipt->ipt_ptr - 1);
   1225    1.1       cgd 			switch (ipt->ipt_flg) {
   1226    1.1       cgd 
   1227    1.1       cgd 			case IPOPT_TS_TSONLY:
   1228    1.1       cgd 				break;
   1229    1.1       cgd 
   1230    1.1       cgd 			case IPOPT_TS_TSANDADDR:
   1231   1.66   thorpej 				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
   1232  1.114    itojun 				    sizeof(struct in_addr) > ipt->ipt_len) {
   1233  1.114    itojun 					code = (u_char *)&ipt->ipt_ptr -
   1234  1.114    itojun 					    (u_char *)ip;
   1235    1.1       cgd 					goto bad;
   1236  1.114    itojun 				}
   1237   1.13   mycroft 				ipaddr.sin_addr = dst;
   1238   1.96   thorpej 				ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
   1239   1.96   thorpej 				    m->m_pkthdr.rcvif));
   1240   1.13   mycroft 				if (ia == 0)
   1241   1.13   mycroft 					continue;
   1242  1.104   thorpej 				bcopy(&ia->ia_addr.sin_addr,
   1243  1.104   thorpej 				    cp0, sizeof(struct in_addr));
   1244    1.1       cgd 				ipt->ipt_ptr += sizeof(struct in_addr);
   1245    1.1       cgd 				break;
   1246    1.1       cgd 
   1247    1.1       cgd 			case IPOPT_TS_PRESPEC:
   1248   1.66   thorpej 				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
   1249  1.114    itojun 				    sizeof(struct in_addr) > ipt->ipt_len) {
   1250  1.114    itojun 					code = (u_char *)&ipt->ipt_ptr -
   1251  1.114    itojun 					    (u_char *)ip;
   1252    1.1       cgd 					goto bad;
   1253  1.114    itojun 				}
   1254  1.104   thorpej 				bcopy(cp0, &ipaddr.sin_addr,
   1255    1.1       cgd 				    sizeof(struct in_addr));
   1256   1.96   thorpej 				if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
   1257   1.96   thorpej 				    == NULL)
   1258    1.1       cgd 					continue;
   1259    1.1       cgd 				ipt->ipt_ptr += sizeof(struct in_addr);
   1260    1.1       cgd 				break;
   1261    1.1       cgd 
   1262    1.1       cgd 			default:
   1263  1.114    itojun 				/* XXX can't take &ipt->ipt_flg */
   1264  1.114    itojun 				code = (u_char *)&ipt->ipt_ptr -
   1265  1.114    itojun 				    (u_char *)ip + 1;
   1266    1.1       cgd 				goto bad;
   1267    1.1       cgd 			}
   1268    1.1       cgd 			ntime = iptime();
   1269  1.107   thorpej 			cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
   1270  1.107   thorpej 			bcopy(cp0, (caddr_t)cp + ipt->ipt_ptr - 1,
   1271    1.1       cgd 			    sizeof(n_time));
   1272    1.1       cgd 			ipt->ipt_ptr += sizeof(n_time);
   1273    1.1       cgd 		}
   1274    1.1       cgd 	}
   1275    1.1       cgd 	if (forward) {
   1276   1.26   thorpej 		if (ip_forwsrcrt == 0) {
   1277   1.26   thorpej 			type = ICMP_UNREACH;
   1278   1.26   thorpej 			code = ICMP_UNREACH_SRCFAIL;
   1279   1.26   thorpej 			goto bad;
   1280   1.26   thorpej 		}
   1281    1.1       cgd 		ip_forward(m, 1);
   1282    1.1       cgd 		return (1);
   1283   1.13   mycroft 	}
   1284   1.13   mycroft 	return (0);
   1285    1.1       cgd bad:
   1286   1.13   mycroft 	icmp_error(m, type, code, 0, 0);
   1287   1.13   mycroft 	ipstat.ips_badoptions++;
   1288    1.1       cgd 	return (1);
   1289    1.1       cgd }
   1290    1.1       cgd 
   1291    1.1       cgd /*
   1292    1.1       cgd  * Given address of next destination (final or next hop),
   1293    1.1       cgd  * return internet address info of interface to be used to get there.
   1294    1.1       cgd  */
   1295    1.1       cgd struct in_ifaddr *
   1296    1.1       cgd ip_rtaddr(dst)
   1297    1.1       cgd 	 struct in_addr dst;
   1298    1.1       cgd {
   1299  1.109  augustss 	struct sockaddr_in *sin;
   1300    1.1       cgd 
   1301   1.19   mycroft 	sin = satosin(&ipforward_rt.ro_dst);
   1302    1.1       cgd 
   1303   1.35   mycroft 	if (ipforward_rt.ro_rt == 0 || !in_hosteq(dst, sin->sin_addr)) {
   1304    1.1       cgd 		if (ipforward_rt.ro_rt) {
   1305    1.1       cgd 			RTFREE(ipforward_rt.ro_rt);
   1306    1.1       cgd 			ipforward_rt.ro_rt = 0;
   1307    1.1       cgd 		}
   1308    1.1       cgd 		sin->sin_family = AF_INET;
   1309    1.1       cgd 		sin->sin_len = sizeof(*sin);
   1310    1.1       cgd 		sin->sin_addr = dst;
   1311    1.1       cgd 
   1312    1.1       cgd 		rtalloc(&ipforward_rt);
   1313    1.1       cgd 	}
   1314    1.1       cgd 	if (ipforward_rt.ro_rt == 0)
   1315    1.1       cgd 		return ((struct in_ifaddr *)0);
   1316   1.19   mycroft 	return (ifatoia(ipforward_rt.ro_rt->rt_ifa));
   1317    1.1       cgd }
   1318    1.1       cgd 
   1319    1.1       cgd /*
   1320    1.1       cgd  * Save incoming source route for use in replies,
   1321    1.1       cgd  * to be picked up later by ip_srcroute if the receiver is interested.
   1322    1.1       cgd  */
   1323   1.13   mycroft void
   1324    1.1       cgd save_rte(option, dst)
   1325    1.1       cgd 	u_char *option;
   1326    1.1       cgd 	struct in_addr dst;
   1327    1.1       cgd {
   1328    1.1       cgd 	unsigned olen;
   1329    1.1       cgd 
   1330    1.1       cgd 	olen = option[IPOPT_OLEN];
   1331    1.1       cgd #ifdef DIAGNOSTIC
   1332    1.1       cgd 	if (ipprintfs)
   1333   1.39  christos 		printf("save_rte: olen %d\n", olen);
   1334   1.89    itojun #endif /* 0 */
   1335    1.1       cgd 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
   1336    1.1       cgd 		return;
   1337    1.1       cgd 	bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
   1338    1.1       cgd 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
   1339    1.1       cgd 	ip_srcrt.dst = dst;
   1340    1.1       cgd }
   1341    1.1       cgd 
   1342    1.1       cgd /*
   1343    1.1       cgd  * Retrieve incoming source route for use in replies,
   1344    1.1       cgd  * in the same form used by setsockopt.
   1345    1.1       cgd  * The first hop is placed before the options, will be removed later.
   1346    1.1       cgd  */
   1347    1.1       cgd struct mbuf *
   1348    1.1       cgd ip_srcroute()
   1349    1.1       cgd {
   1350  1.109  augustss 	struct in_addr *p, *q;
   1351  1.109  augustss 	struct mbuf *m;
   1352    1.1       cgd 
   1353    1.1       cgd 	if (ip_nhops == 0)
   1354    1.1       cgd 		return ((struct mbuf *)0);
   1355    1.1       cgd 	m = m_get(M_DONTWAIT, MT_SOOPTS);
   1356    1.1       cgd 	if (m == 0)
   1357    1.1       cgd 		return ((struct mbuf *)0);
   1358    1.1       cgd 
   1359   1.13   mycroft #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
   1360    1.1       cgd 
   1361    1.1       cgd 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
   1362    1.1       cgd 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
   1363    1.1       cgd 	    OPTSIZ;
   1364    1.1       cgd #ifdef DIAGNOSTIC
   1365    1.1       cgd 	if (ipprintfs)
   1366   1.39  christos 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
   1367    1.1       cgd #endif
   1368    1.1       cgd 
   1369    1.1       cgd 	/*
   1370    1.1       cgd 	 * First save first hop for return route
   1371    1.1       cgd 	 */
   1372    1.1       cgd 	p = &ip_srcrt.route[ip_nhops - 1];
   1373    1.1       cgd 	*(mtod(m, struct in_addr *)) = *p--;
   1374    1.1       cgd #ifdef DIAGNOSTIC
   1375    1.1       cgd 	if (ipprintfs)
   1376   1.39  christos 		printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
   1377    1.1       cgd #endif
   1378    1.1       cgd 
   1379    1.1       cgd 	/*
   1380    1.1       cgd 	 * Copy option fields and padding (nop) to mbuf.
   1381    1.1       cgd 	 */
   1382    1.1       cgd 	ip_srcrt.nop = IPOPT_NOP;
   1383    1.1       cgd 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
   1384    1.1       cgd 	bcopy((caddr_t)&ip_srcrt.nop,
   1385    1.1       cgd 	    mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
   1386    1.1       cgd 	q = (struct in_addr *)(mtod(m, caddr_t) +
   1387    1.1       cgd 	    sizeof(struct in_addr) + OPTSIZ);
   1388    1.1       cgd #undef OPTSIZ
   1389    1.1       cgd 	/*
   1390    1.1       cgd 	 * Record return path as an IP source route,
   1391    1.1       cgd 	 * reversing the path (pointers are now aligned).
   1392    1.1       cgd 	 */
   1393    1.1       cgd 	while (p >= ip_srcrt.route) {
   1394    1.1       cgd #ifdef DIAGNOSTIC
   1395    1.1       cgd 		if (ipprintfs)
   1396   1.39  christos 			printf(" %x", ntohl(q->s_addr));
   1397    1.1       cgd #endif
   1398    1.1       cgd 		*q++ = *p--;
   1399    1.1       cgd 	}
   1400    1.1       cgd 	/*
   1401    1.1       cgd 	 * Last hop goes to final destination.
   1402    1.1       cgd 	 */
   1403    1.1       cgd 	*q = ip_srcrt.dst;
   1404    1.1       cgd #ifdef DIAGNOSTIC
   1405    1.1       cgd 	if (ipprintfs)
   1406   1.39  christos 		printf(" %x\n", ntohl(q->s_addr));
   1407    1.1       cgd #endif
   1408    1.1       cgd 	return (m);
   1409    1.1       cgd }
   1410    1.1       cgd 
   1411    1.1       cgd /*
   1412    1.1       cgd  * Strip out IP options, at higher
   1413    1.1       cgd  * level protocol in the kernel.
   1414    1.1       cgd  * Second argument is buffer to which options
   1415    1.1       cgd  * will be moved, and return value is their length.
   1416    1.1       cgd  * XXX should be deleted; last arg currently ignored.
   1417    1.1       cgd  */
   1418    1.8   mycroft void
   1419    1.1       cgd ip_stripoptions(m, mopt)
   1420  1.109  augustss 	struct mbuf *m;
   1421    1.1       cgd 	struct mbuf *mopt;
   1422    1.1       cgd {
   1423  1.109  augustss 	int i;
   1424    1.1       cgd 	struct ip *ip = mtod(m, struct ip *);
   1425  1.109  augustss 	caddr_t opts;
   1426    1.1       cgd 	int olen;
   1427    1.1       cgd 
   1428   1.79   mycroft 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
   1429    1.1       cgd 	opts = (caddr_t)(ip + 1);
   1430    1.1       cgd 	i = m->m_len - (sizeof (struct ip) + olen);
   1431    1.1       cgd 	bcopy(opts  + olen, opts, (unsigned)i);
   1432    1.1       cgd 	m->m_len -= olen;
   1433    1.1       cgd 	if (m->m_flags & M_PKTHDR)
   1434    1.1       cgd 		m->m_pkthdr.len -= olen;
   1435   1.79   mycroft 	ip->ip_len -= olen;
   1436   1.79   mycroft 	ip->ip_hl = sizeof (struct ip) >> 2;
   1437    1.1       cgd }
   1438    1.1       cgd 
   1439  1.139      matt const int inetctlerrmap[PRC_NCMDS] = {
   1440    1.1       cgd 	0,		0,		0,		0,
   1441    1.1       cgd 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
   1442    1.1       cgd 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
   1443    1.1       cgd 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
   1444    1.1       cgd 	0,		0,		0,		0,
   1445    1.1       cgd 	ENOPROTOOPT
   1446    1.1       cgd };
   1447    1.1       cgd 
   1448    1.1       cgd /*
   1449    1.1       cgd  * Forward a packet.  If some error occurs return the sender
   1450    1.1       cgd  * an icmp packet.  Note we can't always generate a meaningful
   1451    1.1       cgd  * icmp message because icmp doesn't have a large enough repertoire
   1452    1.1       cgd  * of codes and types.
   1453    1.1       cgd  *
   1454    1.1       cgd  * If not forwarding, just drop the packet.  This could be confusing
   1455    1.1       cgd  * if ipforwarding was zero but some routing protocol was advancing
   1456    1.1       cgd  * us as a gateway to somewhere.  However, we must let the routing
   1457    1.1       cgd  * protocol deal with that.
   1458    1.1       cgd  *
   1459    1.1       cgd  * The srcrt parameter indicates whether the packet is being forwarded
   1460    1.1       cgd  * via a source route.
   1461    1.1       cgd  */
   1462   1.13   mycroft void
   1463    1.1       cgd ip_forward(m, srcrt)
   1464    1.1       cgd 	struct mbuf *m;
   1465    1.1       cgd 	int srcrt;
   1466    1.1       cgd {
   1467  1.109  augustss 	struct ip *ip = mtod(m, struct ip *);
   1468  1.109  augustss 	struct sockaddr_in *sin;
   1469  1.109  augustss 	struct rtentry *rt;
   1470   1.28  christos 	int error, type = 0, code = 0;
   1471    1.1       cgd 	struct mbuf *mcopy;
   1472   1.13   mycroft 	n_long dest;
   1473   1.13   mycroft 	struct ifnet *destifp;
   1474   1.89    itojun #ifdef IPSEC
   1475   1.89    itojun 	struct ifnet dummyifp;
   1476   1.89    itojun #endif
   1477  1.135   thorpej 
   1478  1.135   thorpej 	/*
   1479  1.135   thorpej 	 * Clear any in-bound checksum flags for this packet.
   1480  1.135   thorpej 	 */
   1481  1.135   thorpej 	m->m_pkthdr.csum_flags = 0;
   1482    1.1       cgd 
   1483   1.13   mycroft 	dest = 0;
   1484    1.1       cgd #ifdef DIAGNOSTIC
   1485    1.1       cgd 	if (ipprintfs)
   1486   1.70   thorpej 		printf("forward: src %2.2x dst %2.2x ttl %x\n",
   1487   1.70   thorpej 		    ntohl(ip->ip_src.s_addr),
   1488   1.70   thorpej 		    ntohl(ip->ip_dst.s_addr), ip->ip_ttl);
   1489    1.1       cgd #endif
   1490   1.93  sommerfe 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
   1491    1.1       cgd 		ipstat.ips_cantforward++;
   1492    1.1       cgd 		m_freem(m);
   1493    1.1       cgd 		return;
   1494    1.1       cgd 	}
   1495    1.1       cgd 	if (ip->ip_ttl <= IPTTLDEC) {
   1496   1.13   mycroft 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
   1497    1.1       cgd 		return;
   1498    1.1       cgd 	}
   1499    1.1       cgd 	ip->ip_ttl -= IPTTLDEC;
   1500    1.1       cgd 
   1501   1.19   mycroft 	sin = satosin(&ipforward_rt.ro_dst);
   1502    1.1       cgd 	if ((rt = ipforward_rt.ro_rt) == 0 ||
   1503   1.35   mycroft 	    !in_hosteq(ip->ip_dst, sin->sin_addr)) {
   1504    1.1       cgd 		if (ipforward_rt.ro_rt) {
   1505    1.1       cgd 			RTFREE(ipforward_rt.ro_rt);
   1506    1.1       cgd 			ipforward_rt.ro_rt = 0;
   1507    1.1       cgd 		}
   1508    1.1       cgd 		sin->sin_family = AF_INET;
   1509   1.35   mycroft 		sin->sin_len = sizeof(struct sockaddr_in);
   1510    1.1       cgd 		sin->sin_addr = ip->ip_dst;
   1511    1.1       cgd 
   1512    1.1       cgd 		rtalloc(&ipforward_rt);
   1513    1.1       cgd 		if (ipforward_rt.ro_rt == 0) {
   1514   1.13   mycroft 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
   1515    1.1       cgd 			return;
   1516    1.1       cgd 		}
   1517    1.1       cgd 		rt = ipforward_rt.ro_rt;
   1518    1.1       cgd 	}
   1519    1.1       cgd 
   1520    1.1       cgd 	/*
   1521   1.34   mycroft 	 * Save at most 68 bytes of the packet in case
   1522    1.1       cgd 	 * we need to generate an ICMP message to the src.
   1523  1.119    itojun 	 * Pullup to avoid sharing mbuf cluster between m and mcopy.
   1524    1.1       cgd 	 */
   1525  1.119    itojun 	mcopy = m_copym(m, 0, imin((int)ip->ip_len, 68), M_DONTWAIT);
   1526  1.119    itojun 	if (mcopy)
   1527  1.119    itojun 		mcopy = m_pullup(mcopy, ip->ip_hl << 2);
   1528    1.1       cgd 
   1529    1.1       cgd 	/*
   1530    1.1       cgd 	 * If forwarding packet using same interface that it came in on,
   1531    1.1       cgd 	 * perhaps should send a redirect to sender to shortcut a hop.
   1532    1.1       cgd 	 * Only send redirect if source is sending directly to us,
   1533    1.1       cgd 	 * and if packet was not source routed (or has any options).
   1534    1.1       cgd 	 * Also, don't send redirect if forwarding using a default route
   1535    1.1       cgd 	 * or a route modified by a redirect.
   1536    1.1       cgd 	 */
   1537    1.1       cgd 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
   1538    1.1       cgd 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
   1539   1.35   mycroft 	    !in_nullhost(satosin(rt_key(rt))->sin_addr) &&
   1540    1.1       cgd 	    ipsendredirects && !srcrt) {
   1541   1.19   mycroft 		if (rt->rt_ifa &&
   1542   1.19   mycroft 		    (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
   1543   1.19   mycroft 		    ifatoia(rt->rt_ifa)->ia_subnet) {
   1544   1.77   thorpej 			if (rt->rt_flags & RTF_GATEWAY)
   1545   1.77   thorpej 				dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
   1546   1.77   thorpej 			else
   1547   1.77   thorpej 				dest = ip->ip_dst.s_addr;
   1548   1.77   thorpej 			/*
   1549   1.77   thorpej 			 * Router requirements says to only send host
   1550   1.77   thorpej 			 * redirects.
   1551   1.77   thorpej 			 */
   1552   1.77   thorpej 			type = ICMP_REDIRECT;
   1553   1.77   thorpej 			code = ICMP_REDIRECT_HOST;
   1554    1.1       cgd #ifdef DIAGNOSTIC
   1555   1.77   thorpej 			if (ipprintfs)
   1556   1.77   thorpej 				printf("redirect (%d) to %x\n", code,
   1557   1.77   thorpej 				    (u_int32_t)dest);
   1558    1.1       cgd #endif
   1559    1.1       cgd 		}
   1560    1.1       cgd 	}
   1561    1.1       cgd 
   1562   1.89    itojun #ifdef IPSEC
   1563  1.134     lukem 	/* Don't lookup socket in forwarding case */
   1564  1.127    itojun 	(void)ipsec_setsocket(m, NULL);
   1565  1.103    itojun #endif
   1566   1.27   thorpej 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
   1567   1.27   thorpej 	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), 0);
   1568    1.1       cgd 	if (error)
   1569    1.1       cgd 		ipstat.ips_cantforward++;
   1570    1.1       cgd 	else {
   1571    1.1       cgd 		ipstat.ips_forward++;
   1572    1.1       cgd 		if (type)
   1573    1.1       cgd 			ipstat.ips_redirectsent++;
   1574    1.1       cgd 		else {
   1575   1.63      matt 			if (mcopy) {
   1576   1.63      matt #ifdef GATEWAY
   1577   1.64   thorpej 				if (mcopy->m_flags & M_CANFASTFWD)
   1578   1.64   thorpej 					ipflow_create(&ipforward_rt, mcopy);
   1579   1.63      matt #endif
   1580    1.1       cgd 				m_freem(mcopy);
   1581   1.63      matt 			}
   1582    1.1       cgd 			return;
   1583    1.1       cgd 		}
   1584    1.1       cgd 	}
   1585    1.1       cgd 	if (mcopy == NULL)
   1586    1.1       cgd 		return;
   1587   1.13   mycroft 	destifp = NULL;
   1588   1.13   mycroft 
   1589    1.1       cgd 	switch (error) {
   1590    1.1       cgd 
   1591    1.1       cgd 	case 0:				/* forwarded, but need redirect */
   1592    1.1       cgd 		/* type, code set above */
   1593    1.1       cgd 		break;
   1594    1.1       cgd 
   1595    1.1       cgd 	case ENETUNREACH:		/* shouldn't happen, checked above */
   1596    1.1       cgd 	case EHOSTUNREACH:
   1597    1.1       cgd 	case ENETDOWN:
   1598    1.1       cgd 	case EHOSTDOWN:
   1599    1.1       cgd 	default:
   1600    1.1       cgd 		type = ICMP_UNREACH;
   1601    1.1       cgd 		code = ICMP_UNREACH_HOST;
   1602    1.1       cgd 		break;
   1603    1.1       cgd 
   1604    1.1       cgd 	case EMSGSIZE:
   1605    1.1       cgd 		type = ICMP_UNREACH;
   1606    1.1       cgd 		code = ICMP_UNREACH_NEEDFRAG;
   1607   1.89    itojun #ifndef IPSEC
   1608   1.13   mycroft 		if (ipforward_rt.ro_rt)
   1609   1.13   mycroft 			destifp = ipforward_rt.ro_rt->rt_ifp;
   1610   1.89    itojun #else
   1611   1.89    itojun 		/*
   1612   1.89    itojun 		 * If the packet is routed over IPsec tunnel, tell the
   1613   1.89    itojun 		 * originator the tunnel MTU.
   1614   1.89    itojun 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
   1615   1.89    itojun 		 * XXX quickhack!!!
   1616   1.89    itojun 		 */
   1617   1.89    itojun 		if (ipforward_rt.ro_rt) {
   1618   1.89    itojun 			struct secpolicy *sp;
   1619   1.89    itojun 			int ipsecerror;
   1620   1.95    itojun 			size_t ipsechdr;
   1621   1.89    itojun 			struct route *ro;
   1622   1.89    itojun 
   1623   1.89    itojun 			sp = ipsec4_getpolicybyaddr(mcopy,
   1624   1.95    itojun 			                            IPSEC_DIR_OUTBOUND,
   1625   1.95    itojun 			                            IP_FORWARDING,
   1626   1.95    itojun 			                            &ipsecerror);
   1627   1.89    itojun 
   1628   1.89    itojun 			if (sp == NULL)
   1629   1.89    itojun 				destifp = ipforward_rt.ro_rt->rt_ifp;
   1630   1.89    itojun 			else {
   1631   1.89    itojun 				/* count IPsec header size */
   1632   1.95    itojun 				ipsechdr = ipsec4_hdrsiz(mcopy,
   1633   1.95    itojun 				                         IPSEC_DIR_OUTBOUND,
   1634   1.95    itojun 				                         NULL);
   1635   1.89    itojun 
   1636   1.89    itojun 				/*
   1637   1.89    itojun 				 * find the correct route for outer IPv4
   1638   1.89    itojun 				 * header, compute tunnel MTU.
   1639   1.89    itojun 				 *
   1640   1.89    itojun 				 * XXX BUG ALERT
   1641   1.89    itojun 				 * The "dummyifp" code relies upon the fact
   1642   1.89    itojun 				 * that icmp_error() touches only ifp->if_mtu.
   1643   1.89    itojun 				 */
   1644   1.89    itojun 				/*XXX*/
   1645   1.89    itojun 				destifp = NULL;
   1646   1.89    itojun 				if (sp->req != NULL
   1647   1.95    itojun 				 && sp->req->sav != NULL
   1648   1.95    itojun 				 && sp->req->sav->sah != NULL) {
   1649   1.95    itojun 					ro = &sp->req->sav->sah->sa_route;
   1650   1.89    itojun 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
   1651   1.89    itojun 						dummyifp.if_mtu =
   1652   1.89    itojun 						    ro->ro_rt->rt_ifp->if_mtu;
   1653   1.89    itojun 						dummyifp.if_mtu -= ipsechdr;
   1654   1.89    itojun 						destifp = &dummyifp;
   1655   1.89    itojun 					}
   1656   1.89    itojun 				}
   1657   1.89    itojun 
   1658   1.89    itojun 				key_freesp(sp);
   1659   1.89    itojun 			}
   1660   1.89    itojun 		}
   1661   1.89    itojun #endif /*IPSEC*/
   1662    1.1       cgd 		ipstat.ips_cantfrag++;
   1663    1.1       cgd 		break;
   1664    1.1       cgd 
   1665    1.1       cgd 	case ENOBUFS:
   1666    1.1       cgd 		type = ICMP_SOURCEQUENCH;
   1667    1.1       cgd 		code = 0;
   1668    1.1       cgd 		break;
   1669    1.1       cgd 	}
   1670   1.13   mycroft 	icmp_error(mcopy, type, code, dest, destifp);
   1671   1.44   thorpej }
   1672   1.44   thorpej 
   1673   1.44   thorpej void
   1674   1.44   thorpej ip_savecontrol(inp, mp, ip, m)
   1675  1.109  augustss 	struct inpcb *inp;
   1676  1.109  augustss 	struct mbuf **mp;
   1677  1.109  augustss 	struct ip *ip;
   1678  1.109  augustss 	struct mbuf *m;
   1679   1.44   thorpej {
   1680   1.44   thorpej 
   1681   1.44   thorpej 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
   1682   1.44   thorpej 		struct timeval tv;
   1683   1.44   thorpej 
   1684   1.44   thorpej 		microtime(&tv);
   1685   1.44   thorpej 		*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
   1686   1.44   thorpej 		    SCM_TIMESTAMP, SOL_SOCKET);
   1687   1.44   thorpej 		if (*mp)
   1688   1.44   thorpej 			mp = &(*mp)->m_next;
   1689   1.44   thorpej 	}
   1690   1.44   thorpej 	if (inp->inp_flags & INP_RECVDSTADDR) {
   1691   1.44   thorpej 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
   1692   1.44   thorpej 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
   1693   1.44   thorpej 		if (*mp)
   1694   1.44   thorpej 			mp = &(*mp)->m_next;
   1695   1.44   thorpej 	}
   1696   1.44   thorpej #ifdef notyet
   1697   1.44   thorpej 	/*
   1698   1.44   thorpej 	 * XXX
   1699   1.44   thorpej 	 * Moving these out of udp_input() made them even more broken
   1700   1.44   thorpej 	 * than they already were.
   1701   1.44   thorpej 	 *	- fenner (at) parc.xerox.com
   1702   1.44   thorpej 	 */
   1703   1.44   thorpej 	/* options were tossed already */
   1704   1.44   thorpej 	if (inp->inp_flags & INP_RECVOPTS) {
   1705   1.44   thorpej 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
   1706   1.44   thorpej 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
   1707   1.44   thorpej 		if (*mp)
   1708   1.44   thorpej 			mp = &(*mp)->m_next;
   1709   1.44   thorpej 	}
   1710   1.44   thorpej 	/* ip_srcroute doesn't do what we want here, need to fix */
   1711   1.44   thorpej 	if (inp->inp_flags & INP_RECVRETOPTS) {
   1712   1.44   thorpej 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(),
   1713   1.44   thorpej 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
   1714   1.44   thorpej 		if (*mp)
   1715   1.44   thorpej 			mp = &(*mp)->m_next;
   1716   1.44   thorpej 	}
   1717   1.44   thorpej #endif
   1718   1.44   thorpej 	if (inp->inp_flags & INP_RECVIF) {
   1719   1.44   thorpej 		struct sockaddr_dl sdl;
   1720   1.44   thorpej 
   1721   1.44   thorpej 		sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
   1722   1.44   thorpej 		sdl.sdl_family = AF_LINK;
   1723   1.44   thorpej 		sdl.sdl_index = m->m_pkthdr.rcvif ?
   1724   1.44   thorpej 		    m->m_pkthdr.rcvif->if_index : 0;
   1725   1.44   thorpej 		sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
   1726   1.44   thorpej 		*mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
   1727   1.44   thorpej 		    IP_RECVIF, IPPROTO_IP);
   1728   1.44   thorpej 		if (*mp)
   1729   1.44   thorpej 			mp = &(*mp)->m_next;
   1730   1.44   thorpej 	}
   1731   1.13   mycroft }
   1732   1.13   mycroft 
   1733   1.13   mycroft int
   1734   1.13   mycroft ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
   1735   1.13   mycroft 	int *name;
   1736   1.13   mycroft 	u_int namelen;
   1737   1.13   mycroft 	void *oldp;
   1738   1.13   mycroft 	size_t *oldlenp;
   1739   1.13   mycroft 	void *newp;
   1740   1.13   mycroft 	size_t newlen;
   1741   1.13   mycroft {
   1742   1.88  sommerfe 	extern int subnetsarelocal, hostzeroisbroadcast;
   1743   1.52   thorpej 
   1744   1.54     lukem 	int error, old;
   1745   1.54     lukem 
   1746   1.13   mycroft 	/* All sysctl names at this level are terminal. */
   1747   1.13   mycroft 	if (namelen != 1)
   1748   1.13   mycroft 		return (ENOTDIR);
   1749   1.13   mycroft 
   1750   1.13   mycroft 	switch (name[0]) {
   1751   1.13   mycroft 	case IPCTL_FORWARDING:
   1752   1.13   mycroft 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
   1753   1.13   mycroft 	case IPCTL_SENDREDIRECTS:
   1754   1.13   mycroft 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1755   1.13   mycroft 			&ipsendredirects));
   1756   1.13   mycroft 	case IPCTL_DEFTTL:
   1757   1.13   mycroft 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
   1758   1.13   mycroft #ifdef notyet
   1759   1.13   mycroft 	case IPCTL_DEFMTU:
   1760   1.13   mycroft 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
   1761   1.13   mycroft #endif
   1762   1.26   thorpej 	case IPCTL_FORWSRCRT:
   1763   1.47       cjs 		/* Don't allow this to change in a secure environment.  */
   1764   1.26   thorpej 		if (securelevel > 0)
   1765   1.46       cjs 			return (sysctl_rdint(oldp, oldlenp, newp,
   1766   1.46       cjs 			    ip_forwsrcrt));
   1767   1.46       cjs 		else
   1768   1.46       cjs 			return (sysctl_int(oldp, oldlenp, newp, newlen,
   1769   1.46       cjs 			    &ip_forwsrcrt));
   1770   1.27   thorpej 	case IPCTL_DIRECTEDBCAST:
   1771   1.27   thorpej 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1772   1.27   thorpej 		    &ip_directedbcast));
   1773   1.47       cjs 	case IPCTL_ALLOWSRCRT:
   1774   1.47       cjs 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1775   1.47       cjs 		    &ip_allowsrcrt));
   1776   1.52   thorpej 	case IPCTL_SUBNETSARELOCAL:
   1777   1.52   thorpej 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1778   1.52   thorpej 		    &subnetsarelocal));
   1779   1.53       kml 	case IPCTL_MTUDISC:
   1780   1.60       kml 		error = sysctl_int(oldp, oldlenp, newp, newlen,
   1781   1.60       kml 		    &ip_mtudisc);
   1782   1.60       kml 		if (ip_mtudisc != 0 && ip_mtudisc_timeout_q == NULL) {
   1783   1.60       kml 			ip_mtudisc_timeout_q =
   1784   1.60       kml 			    rt_timer_queue_create(ip_mtudisc_timeout);
   1785   1.60       kml 		} else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) {
   1786   1.60       kml 			rt_timer_queue_destroy(ip_mtudisc_timeout_q, TRUE);
   1787   1.60       kml 			ip_mtudisc_timeout_q = NULL;
   1788   1.60       kml 		}
   1789   1.60       kml 		return error;
   1790   1.54     lukem 	case IPCTL_ANONPORTMIN:
   1791   1.54     lukem 		old = anonportmin;
   1792   1.54     lukem 		error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmin);
   1793  1.118    itojun 		if (anonportmin >= anonportmax || anonportmin < 0
   1794  1.118    itojun 		    || anonportmin > 65535
   1795   1.54     lukem #ifndef IPNOPRIVPORTS
   1796   1.54     lukem 		    || anonportmin < IPPORT_RESERVED
   1797   1.54     lukem #endif
   1798   1.54     lukem 		    ) {
   1799   1.54     lukem 			anonportmin = old;
   1800   1.54     lukem 			return (EINVAL);
   1801   1.54     lukem 		}
   1802   1.54     lukem 		return (error);
   1803   1.54     lukem 	case IPCTL_ANONPORTMAX:
   1804   1.54     lukem 		old = anonportmax;
   1805   1.54     lukem 		error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmax);
   1806  1.118    itojun 		if (anonportmin >= anonportmax || anonportmax < 0
   1807  1.118    itojun 		    || anonportmax > 65535
   1808   1.54     lukem #ifndef IPNOPRIVPORTS
   1809   1.54     lukem 		    || anonportmax < IPPORT_RESERVED
   1810   1.54     lukem #endif
   1811   1.54     lukem 		    ) {
   1812   1.54     lukem 			anonportmax = old;
   1813   1.54     lukem 			return (EINVAL);
   1814   1.54     lukem 		}
   1815   1.60       kml 		return (error);
   1816   1.60       kml 	case IPCTL_MTUDISCTIMEOUT:
   1817   1.60       kml 		error = sysctl_int(oldp, oldlenp, newp, newlen,
   1818   1.60       kml 		   &ip_mtudisc_timeout);
   1819   1.60       kml 		if (ip_mtudisc_timeout_q != NULL)
   1820   1.60       kml 			rt_timer_queue_change(ip_mtudisc_timeout_q,
   1821   1.60       kml 					      ip_mtudisc_timeout);
   1822   1.54     lukem 		return (error);
   1823   1.65      matt #ifdef GATEWAY
   1824   1.65      matt 	case IPCTL_MAXFLOWS:
   1825   1.67   thorpej 	    {
   1826   1.67   thorpej 		int s;
   1827   1.67   thorpej 
   1828   1.65      matt 		error = sysctl_int(oldp, oldlenp, newp, newlen,
   1829   1.65      matt 		   &ip_maxflows);
   1830   1.67   thorpej 		s = splsoftnet();
   1831   1.65      matt 		ipflow_reap(0);
   1832   1.67   thorpej 		splx(s);
   1833   1.65      matt 		return (error);
   1834   1.67   thorpej 	    }
   1835   1.89    itojun #endif
   1836   1.90    itojun 	case IPCTL_HOSTZEROBROADCAST:
   1837   1.90    itojun 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1838   1.90    itojun 		    &hostzeroisbroadcast));
   1839   1.89    itojun #if NGIF > 0
   1840   1.89    itojun 	case IPCTL_GIF_TTL:
   1841   1.89    itojun 		return(sysctl_int(oldp, oldlenp, newp, newlen,
   1842   1.90    itojun 				  &ip_gif_ttl));
   1843  1.117      tron #endif
   1844  1.117      tron 
   1845  1.117      tron #ifndef IPNOPRIVPORTS
   1846  1.117      tron 	case IPCTL_LOWPORTMIN:
   1847  1.117      tron 		old = lowportmin;
   1848  1.117      tron 		error = sysctl_int(oldp, oldlenp, newp, newlen, &lowportmin);
   1849  1.117      tron 		if (lowportmin >= lowportmax
   1850  1.117      tron 		    || lowportmin > IPPORT_RESERVEDMAX
   1851  1.117      tron 		    || lowportmin < IPPORT_RESERVEDMIN
   1852  1.117      tron 		    ) {
   1853  1.117      tron 			lowportmin = old;
   1854  1.117      tron 			return (EINVAL);
   1855  1.117      tron 		}
   1856  1.117      tron 		return (error);
   1857  1.117      tron 	case IPCTL_LOWPORTMAX:
   1858  1.117      tron 		old = lowportmax;
   1859  1.117      tron 		error = sysctl_int(oldp, oldlenp, newp, newlen, &lowportmax);
   1860  1.117      tron 		if (lowportmin >= lowportmax
   1861  1.117      tron 		    || lowportmax > IPPORT_RESERVEDMAX
   1862  1.117      tron 		    || lowportmax < IPPORT_RESERVEDMIN
   1863  1.117      tron 		    ) {
   1864  1.117      tron 			lowportmax = old;
   1865  1.117      tron 			return (EINVAL);
   1866  1.117      tron 		}
   1867  1.117      tron 		return (error);
   1868   1.65      matt #endif
   1869  1.131    itojun 
   1870  1.131    itojun 	case IPCTL_MAXFRAGPACKETS:
   1871  1.131    itojun 		return (sysctl_int(oldp, oldlenp, newp, newlen,
   1872  1.131    itojun 		    &ip_maxfragpackets));
   1873   1.88  sommerfe 
   1874   1.13   mycroft 	default:
   1875   1.13   mycroft 		return (EOPNOTSUPP);
   1876   1.13   mycroft 	}
   1877   1.13   mycroft 	/* NOTREACHED */
   1878    1.1       cgd }
   1879