Home | History | Annotate | Line # | Download | only in netinet
ip_input.c revision 1.250.6.2
      1  1.250.6.2  dyoung /*	$NetBSD: ip_input.c,v 1.250.6.2 2007/07/19 20:48:56 dyoung Exp $	*/
      2  1.250.6.2  dyoung 
      3  1.250.6.2  dyoung /*
      4  1.250.6.2  dyoung  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5  1.250.6.2  dyoung  * All rights reserved.
      6  1.250.6.2  dyoung  *
      7  1.250.6.2  dyoung  * Redistribution and use in source and binary forms, with or without
      8  1.250.6.2  dyoung  * modification, are permitted provided that the following conditions
      9  1.250.6.2  dyoung  * are met:
     10  1.250.6.2  dyoung  * 1. Redistributions of source code must retain the above copyright
     11  1.250.6.2  dyoung  *    notice, this list of conditions and the following disclaimer.
     12  1.250.6.2  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.250.6.2  dyoung  *    notice, this list of conditions and the following disclaimer in the
     14  1.250.6.2  dyoung  *    documentation and/or other materials provided with the distribution.
     15  1.250.6.2  dyoung  * 3. Neither the name of the project nor the names of its contributors
     16  1.250.6.2  dyoung  *    may be used to endorse or promote products derived from this software
     17  1.250.6.2  dyoung  *    without specific prior written permission.
     18  1.250.6.2  dyoung  *
     19  1.250.6.2  dyoung  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20  1.250.6.2  dyoung  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  1.250.6.2  dyoung  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  1.250.6.2  dyoung  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23  1.250.6.2  dyoung  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  1.250.6.2  dyoung  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  1.250.6.2  dyoung  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  1.250.6.2  dyoung  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  1.250.6.2  dyoung  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  1.250.6.2  dyoung  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  1.250.6.2  dyoung  * SUCH DAMAGE.
     30  1.250.6.2  dyoung  */
     31  1.250.6.2  dyoung 
     32  1.250.6.2  dyoung /*-
     33  1.250.6.2  dyoung  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34  1.250.6.2  dyoung  * All rights reserved.
     35  1.250.6.2  dyoung  *
     36  1.250.6.2  dyoung  * This code is derived from software contributed to The NetBSD Foundation
     37  1.250.6.2  dyoung  * by Public Access Networks Corporation ("Panix").  It was developed under
     38  1.250.6.2  dyoung  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39  1.250.6.2  dyoung  *
     40  1.250.6.2  dyoung  * Redistribution and use in source and binary forms, with or without
     41  1.250.6.2  dyoung  * modification, are permitted provided that the following conditions
     42  1.250.6.2  dyoung  * are met:
     43  1.250.6.2  dyoung  * 1. Redistributions of source code must retain the above copyright
     44  1.250.6.2  dyoung  *    notice, this list of conditions and the following disclaimer.
     45  1.250.6.2  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     46  1.250.6.2  dyoung  *    notice, this list of conditions and the following disclaimer in the
     47  1.250.6.2  dyoung  *    documentation and/or other materials provided with the distribution.
     48  1.250.6.2  dyoung  * 3. All advertising materials mentioning features or use of this software
     49  1.250.6.2  dyoung  *    must display the following acknowledgement:
     50  1.250.6.2  dyoung  *	This product includes software developed by the NetBSD
     51  1.250.6.2  dyoung  *	Foundation, Inc. and its contributors.
     52  1.250.6.2  dyoung  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53  1.250.6.2  dyoung  *    contributors may be used to endorse or promote products derived
     54  1.250.6.2  dyoung  *    from this software without specific prior written permission.
     55  1.250.6.2  dyoung  *
     56  1.250.6.2  dyoung  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57  1.250.6.2  dyoung  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58  1.250.6.2  dyoung  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59  1.250.6.2  dyoung  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60  1.250.6.2  dyoung  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61  1.250.6.2  dyoung  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62  1.250.6.2  dyoung  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63  1.250.6.2  dyoung  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64  1.250.6.2  dyoung  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65  1.250.6.2  dyoung  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66  1.250.6.2  dyoung  * POSSIBILITY OF SUCH DAMAGE.
     67  1.250.6.2  dyoung  */
     68  1.250.6.2  dyoung 
     69  1.250.6.2  dyoung /*
     70  1.250.6.2  dyoung  * Copyright (c) 1982, 1986, 1988, 1993
     71  1.250.6.2  dyoung  *	The Regents of the University of California.  All rights reserved.
     72  1.250.6.2  dyoung  *
     73  1.250.6.2  dyoung  * Redistribution and use in source and binary forms, with or without
     74  1.250.6.2  dyoung  * modification, are permitted provided that the following conditions
     75  1.250.6.2  dyoung  * are met:
     76  1.250.6.2  dyoung  * 1. Redistributions of source code must retain the above copyright
     77  1.250.6.2  dyoung  *    notice, this list of conditions and the following disclaimer.
     78  1.250.6.2  dyoung  * 2. Redistributions in binary form must reproduce the above copyright
     79  1.250.6.2  dyoung  *    notice, this list of conditions and the following disclaimer in the
     80  1.250.6.2  dyoung  *    documentation and/or other materials provided with the distribution.
     81  1.250.6.2  dyoung  * 3. Neither the name of the University nor the names of its contributors
     82  1.250.6.2  dyoung  *    may be used to endorse or promote products derived from this software
     83  1.250.6.2  dyoung  *    without specific prior written permission.
     84  1.250.6.2  dyoung  *
     85  1.250.6.2  dyoung  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     86  1.250.6.2  dyoung  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     87  1.250.6.2  dyoung  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     88  1.250.6.2  dyoung  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     89  1.250.6.2  dyoung  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     90  1.250.6.2  dyoung  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     91  1.250.6.2  dyoung  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     92  1.250.6.2  dyoung  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     93  1.250.6.2  dyoung  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     94  1.250.6.2  dyoung  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     95  1.250.6.2  dyoung  * SUCH DAMAGE.
     96  1.250.6.2  dyoung  *
     97  1.250.6.2  dyoung  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
     98  1.250.6.2  dyoung  */
     99  1.250.6.2  dyoung 
    100  1.250.6.2  dyoung #include <sys/cdefs.h>
    101  1.250.6.2  dyoung __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.250.6.2 2007/07/19 20:48:56 dyoung Exp $");
    102  1.250.6.2  dyoung 
    103  1.250.6.2  dyoung #include "opt_inet.h"
    104  1.250.6.2  dyoung #include "opt_gateway.h"
    105  1.250.6.2  dyoung #include "opt_pfil_hooks.h"
    106  1.250.6.2  dyoung #include "opt_ipsec.h"
    107  1.250.6.2  dyoung #include "opt_mrouting.h"
    108  1.250.6.2  dyoung #include "opt_mbuftrace.h"
    109  1.250.6.2  dyoung #include "opt_inet_csum.h"
    110  1.250.6.2  dyoung 
    111  1.250.6.2  dyoung #include <sys/param.h>
    112  1.250.6.2  dyoung #include <sys/systm.h>
    113  1.250.6.2  dyoung #include <sys/malloc.h>
    114  1.250.6.2  dyoung #include <sys/mbuf.h>
    115  1.250.6.2  dyoung #include <sys/domain.h>
    116  1.250.6.2  dyoung #include <sys/protosw.h>
    117  1.250.6.2  dyoung #include <sys/socket.h>
    118  1.250.6.2  dyoung #include <sys/socketvar.h>
    119  1.250.6.2  dyoung #include <sys/errno.h>
    120  1.250.6.2  dyoung #include <sys/time.h>
    121  1.250.6.2  dyoung #include <sys/kernel.h>
    122  1.250.6.2  dyoung #include <sys/pool.h>
    123  1.250.6.2  dyoung #include <sys/sysctl.h>
    124  1.250.6.2  dyoung #include <sys/kauth.h>
    125  1.250.6.2  dyoung 
    126  1.250.6.2  dyoung #include <net/if.h>
    127  1.250.6.2  dyoung #include <net/if_dl.h>
    128  1.250.6.2  dyoung #include <net/route.h>
    129  1.250.6.2  dyoung #include <net/pfil.h>
    130  1.250.6.2  dyoung 
    131  1.250.6.2  dyoung #include <netinet/in.h>
    132  1.250.6.2  dyoung #include <netinet/in_systm.h>
    133  1.250.6.2  dyoung #include <netinet/ip.h>
    134  1.250.6.2  dyoung #include <netinet/in_pcb.h>
    135  1.250.6.2  dyoung #include <netinet/in_proto.h>
    136  1.250.6.2  dyoung #include <netinet/in_var.h>
    137  1.250.6.2  dyoung #include <netinet/ip_var.h>
    138  1.250.6.2  dyoung #include <netinet/ip_icmp.h>
    139  1.250.6.2  dyoung /* just for gif_ttl */
    140  1.250.6.2  dyoung #include <netinet/in_gif.h>
    141  1.250.6.2  dyoung #include "gif.h"
    142  1.250.6.2  dyoung #include <net/if_gre.h>
    143  1.250.6.2  dyoung #include "gre.h"
    144  1.250.6.2  dyoung 
    145  1.250.6.2  dyoung #ifdef MROUTING
    146  1.250.6.2  dyoung #include <netinet/ip_mroute.h>
    147  1.250.6.2  dyoung #endif
    148  1.250.6.2  dyoung 
    149  1.250.6.2  dyoung #ifdef IPSEC
    150  1.250.6.2  dyoung #include <netinet6/ipsec.h>
    151  1.250.6.2  dyoung #include <netkey/key.h>
    152  1.250.6.2  dyoung #endif
    153  1.250.6.2  dyoung #ifdef FAST_IPSEC
    154  1.250.6.2  dyoung #include <netipsec/ipsec.h>
    155  1.250.6.2  dyoung #include <netipsec/key.h>
    156  1.250.6.2  dyoung #endif	/* FAST_IPSEC*/
    157  1.250.6.2  dyoung 
    158  1.250.6.2  dyoung #ifndef	IPFORWARDING
    159  1.250.6.2  dyoung #ifdef GATEWAY
    160  1.250.6.2  dyoung #define	IPFORWARDING	1	/* forward IP packets not for us */
    161  1.250.6.2  dyoung #else /* GATEWAY */
    162  1.250.6.2  dyoung #define	IPFORWARDING	0	/* don't forward IP packets not for us */
    163  1.250.6.2  dyoung #endif /* GATEWAY */
    164  1.250.6.2  dyoung #endif /* IPFORWARDING */
    165  1.250.6.2  dyoung #ifndef	IPSENDREDIRECTS
    166  1.250.6.2  dyoung #define	IPSENDREDIRECTS	1
    167  1.250.6.2  dyoung #endif
    168  1.250.6.2  dyoung #ifndef IPFORWSRCRT
    169  1.250.6.2  dyoung #define	IPFORWSRCRT	1	/* forward source-routed packets */
    170  1.250.6.2  dyoung #endif
    171  1.250.6.2  dyoung #ifndef IPALLOWSRCRT
    172  1.250.6.2  dyoung #define	IPALLOWSRCRT	1	/* allow source-routed packets */
    173  1.250.6.2  dyoung #endif
    174  1.250.6.2  dyoung #ifndef IPMTUDISC
    175  1.250.6.2  dyoung #define IPMTUDISC	1
    176  1.250.6.2  dyoung #endif
    177  1.250.6.2  dyoung #ifndef IPMTUDISCTIMEOUT
    178  1.250.6.2  dyoung #define IPMTUDISCTIMEOUT (10 * 60)	/* as per RFC 1191 */
    179  1.250.6.2  dyoung #endif
    180  1.250.6.2  dyoung 
    181  1.250.6.2  dyoung /*
    182  1.250.6.2  dyoung  * Note: DIRECTED_BROADCAST is handled this way so that previous
    183  1.250.6.2  dyoung  * configuration using this option will Just Work.
    184  1.250.6.2  dyoung  */
    185  1.250.6.2  dyoung #ifndef IPDIRECTEDBCAST
    186  1.250.6.2  dyoung #ifdef DIRECTED_BROADCAST
    187  1.250.6.2  dyoung #define IPDIRECTEDBCAST	1
    188  1.250.6.2  dyoung #else
    189  1.250.6.2  dyoung #define	IPDIRECTEDBCAST	0
    190  1.250.6.2  dyoung #endif /* DIRECTED_BROADCAST */
    191  1.250.6.2  dyoung #endif /* IPDIRECTEDBCAST */
    192  1.250.6.2  dyoung int	ipforwarding = IPFORWARDING;
    193  1.250.6.2  dyoung int	ipsendredirects = IPSENDREDIRECTS;
    194  1.250.6.2  dyoung int	ip_defttl = IPDEFTTL;
    195  1.250.6.2  dyoung int	ip_forwsrcrt = IPFORWSRCRT;
    196  1.250.6.2  dyoung int	ip_directedbcast = IPDIRECTEDBCAST;
    197  1.250.6.2  dyoung int	ip_allowsrcrt = IPALLOWSRCRT;
    198  1.250.6.2  dyoung int	ip_mtudisc = IPMTUDISC;
    199  1.250.6.2  dyoung int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
    200  1.250.6.2  dyoung #ifdef DIAGNOSTIC
    201  1.250.6.2  dyoung int	ipprintfs = 0;
    202  1.250.6.2  dyoung #endif
    203  1.250.6.2  dyoung 
    204  1.250.6.2  dyoung int	ip_do_randomid = 0;
    205  1.250.6.2  dyoung 
    206  1.250.6.2  dyoung /*
    207  1.250.6.2  dyoung  * XXX - Setting ip_checkinterface mostly implements the receive side of
    208  1.250.6.2  dyoung  * the Strong ES model described in RFC 1122, but since the routing table
    209  1.250.6.2  dyoung  * and transmit implementation do not implement the Strong ES model,
    210  1.250.6.2  dyoung  * setting this to 1 results in an odd hybrid.
    211  1.250.6.2  dyoung  *
    212  1.250.6.2  dyoung  * XXX - ip_checkinterface currently must be disabled if you use ipnat
    213  1.250.6.2  dyoung  * to translate the destination address to another local interface.
    214  1.250.6.2  dyoung  *
    215  1.250.6.2  dyoung  * XXX - ip_checkinterface must be disabled if you add IP aliases
    216  1.250.6.2  dyoung  * to the loopback interface instead of the interface where the
    217  1.250.6.2  dyoung  * packets for those addresses are received.
    218  1.250.6.2  dyoung  */
    219  1.250.6.2  dyoung int	ip_checkinterface = 0;
    220  1.250.6.2  dyoung 
    221  1.250.6.2  dyoung 
    222  1.250.6.2  dyoung struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
    223  1.250.6.2  dyoung 
    224  1.250.6.2  dyoung int	ipqmaxlen = IFQ_MAXLEN;
    225  1.250.6.2  dyoung u_long	in_ifaddrhash;				/* size of hash table - 1 */
    226  1.250.6.2  dyoung int	in_ifaddrentries;			/* total number of addrs */
    227  1.250.6.2  dyoung struct in_ifaddrhead in_ifaddrhead;
    228  1.250.6.2  dyoung struct	in_ifaddrhashhead *in_ifaddrhashtbl;
    229  1.250.6.2  dyoung u_long	in_multihash;				/* size of hash table - 1 */
    230  1.250.6.2  dyoung int	in_multientries;			/* total number of addrs */
    231  1.250.6.2  dyoung struct	in_multihashhead *in_multihashtbl;
    232  1.250.6.2  dyoung struct	ifqueue ipintrq;
    233  1.250.6.2  dyoung struct	ipstat	ipstat;
    234  1.250.6.2  dyoung uint16_t ip_id;
    235  1.250.6.2  dyoung 
    236  1.250.6.2  dyoung #ifdef PFIL_HOOKS
    237  1.250.6.2  dyoung struct pfil_head inet_pfil_hook;
    238  1.250.6.2  dyoung #endif
    239  1.250.6.2  dyoung 
    240  1.250.6.2  dyoung /*
    241  1.250.6.2  dyoung  * Cached copy of nmbclusters. If nbclusters is different,
    242  1.250.6.2  dyoung  * recalculate IP parameters derived from nmbclusters.
    243  1.250.6.2  dyoung  */
    244  1.250.6.2  dyoung static int	ip_nmbclusters;			/* copy of nmbclusters */
    245  1.250.6.2  dyoung static void	ip_nmbclusters_changed(void);	/* recalc limits */
    246  1.250.6.2  dyoung 
    247  1.250.6.2  dyoung #define CHECK_NMBCLUSTER_PARAMS()				\
    248  1.250.6.2  dyoung do {								\
    249  1.250.6.2  dyoung 	if (__predict_false(ip_nmbclusters != nmbclusters))	\
    250  1.250.6.2  dyoung 		ip_nmbclusters_changed();			\
    251  1.250.6.2  dyoung } while (/*CONSTCOND*/0)
    252  1.250.6.2  dyoung 
    253  1.250.6.2  dyoung /* IP datagram reassembly queues (hashed) */
    254  1.250.6.2  dyoung #define IPREASS_NHASH_LOG2      6
    255  1.250.6.2  dyoung #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
    256  1.250.6.2  dyoung #define IPREASS_HMASK           (IPREASS_NHASH - 1)
    257  1.250.6.2  dyoung #define IPREASS_HASH(x,y) \
    258  1.250.6.2  dyoung 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
    259  1.250.6.2  dyoung struct ipqhead ipq[IPREASS_NHASH];
    260  1.250.6.2  dyoung int	ipq_locked;
    261  1.250.6.2  dyoung static int	ip_nfragpackets;	/* packets in reass queue */
    262  1.250.6.2  dyoung static int	ip_nfrags;		/* total fragments in reass queues */
    263  1.250.6.2  dyoung 
    264  1.250.6.2  dyoung int	ip_maxfragpackets = 200;	/* limit on packets. XXX sysctl */
    265  1.250.6.2  dyoung int	ip_maxfrags;		        /* limit on fragments. XXX sysctl */
    266  1.250.6.2  dyoung 
    267  1.250.6.2  dyoung 
    268  1.250.6.2  dyoung /*
    269  1.250.6.2  dyoung  * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for
    270  1.250.6.2  dyoung  * IP reassembly queue buffer managment.
    271  1.250.6.2  dyoung  *
    272  1.250.6.2  dyoung  * We keep a count of total IP fragments (NB: not fragmented packets!)
    273  1.250.6.2  dyoung  * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments.
    274  1.250.6.2  dyoung  * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the
    275  1.250.6.2  dyoung  * total fragments in  reassembly queues.This AIMD policy avoids
    276  1.250.6.2  dyoung  * repeatedly deleting single packets under heavy fragmentation load
    277  1.250.6.2  dyoung  * (e.g., from lossy NFS peers).
    278  1.250.6.2  dyoung  */
    279  1.250.6.2  dyoung static u_int	ip_reass_ttl_decr(u_int ticks);
    280  1.250.6.2  dyoung static void	ip_reass_drophalf(void);
    281  1.250.6.2  dyoung 
    282  1.250.6.2  dyoung 
    283  1.250.6.2  dyoung static inline int ipq_lock_try(void);
    284  1.250.6.2  dyoung static inline void ipq_unlock(void);
    285  1.250.6.2  dyoung 
    286  1.250.6.2  dyoung static inline int
    287  1.250.6.2  dyoung ipq_lock_try(void)
    288  1.250.6.2  dyoung {
    289  1.250.6.2  dyoung 	int s;
    290  1.250.6.2  dyoung 
    291  1.250.6.2  dyoung 	/*
    292  1.250.6.2  dyoung 	 * Use splvm() -- we're blocking things that would cause
    293  1.250.6.2  dyoung 	 * mbuf allocation.
    294  1.250.6.2  dyoung 	 */
    295  1.250.6.2  dyoung 	s = splvm();
    296  1.250.6.2  dyoung 	if (ipq_locked) {
    297  1.250.6.2  dyoung 		splx(s);
    298  1.250.6.2  dyoung 		return (0);
    299  1.250.6.2  dyoung 	}
    300  1.250.6.2  dyoung 	ipq_locked = 1;
    301  1.250.6.2  dyoung 	splx(s);
    302  1.250.6.2  dyoung 	return (1);
    303  1.250.6.2  dyoung }
    304  1.250.6.2  dyoung 
    305  1.250.6.2  dyoung static inline void
    306  1.250.6.2  dyoung ipq_unlock(void)
    307  1.250.6.2  dyoung {
    308  1.250.6.2  dyoung 	int s;
    309  1.250.6.2  dyoung 
    310  1.250.6.2  dyoung 	s = splvm();
    311  1.250.6.2  dyoung 	ipq_locked = 0;
    312  1.250.6.2  dyoung 	splx(s);
    313  1.250.6.2  dyoung }
    314  1.250.6.2  dyoung 
    315  1.250.6.2  dyoung #ifdef DIAGNOSTIC
    316  1.250.6.2  dyoung #define	IPQ_LOCK()							\
    317  1.250.6.2  dyoung do {									\
    318  1.250.6.2  dyoung 	if (ipq_lock_try() == 0) {					\
    319  1.250.6.2  dyoung 		printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
    320  1.250.6.2  dyoung 		panic("ipq_lock");					\
    321  1.250.6.2  dyoung 	}								\
    322  1.250.6.2  dyoung } while (/*CONSTCOND*/ 0)
    323  1.250.6.2  dyoung #define	IPQ_LOCK_CHECK()						\
    324  1.250.6.2  dyoung do {									\
    325  1.250.6.2  dyoung 	if (ipq_locked == 0) {						\
    326  1.250.6.2  dyoung 		printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
    327  1.250.6.2  dyoung 		panic("ipq lock check");				\
    328  1.250.6.2  dyoung 	}								\
    329  1.250.6.2  dyoung } while (/*CONSTCOND*/ 0)
    330  1.250.6.2  dyoung #else
    331  1.250.6.2  dyoung #define	IPQ_LOCK()		(void) ipq_lock_try()
    332  1.250.6.2  dyoung #define	IPQ_LOCK_CHECK()	/* nothing */
    333  1.250.6.2  dyoung #endif
    334  1.250.6.2  dyoung 
    335  1.250.6.2  dyoung #define	IPQ_UNLOCK()		ipq_unlock()
    336  1.250.6.2  dyoung 
    337  1.250.6.2  dyoung POOL_INIT(inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", NULL,
    338  1.250.6.2  dyoung     IPL_SOFTNET);
    339  1.250.6.2  dyoung POOL_INIT(ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", NULL,
    340  1.250.6.2  dyoung     IPL_VM);
    341  1.250.6.2  dyoung 
    342  1.250.6.2  dyoung #ifdef INET_CSUM_COUNTERS
    343  1.250.6.2  dyoung #include <sys/device.h>
    344  1.250.6.2  dyoung 
    345  1.250.6.2  dyoung struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    346  1.250.6.2  dyoung     NULL, "inet", "hwcsum bad");
    347  1.250.6.2  dyoung struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    348  1.250.6.2  dyoung     NULL, "inet", "hwcsum ok");
    349  1.250.6.2  dyoung struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    350  1.250.6.2  dyoung     NULL, "inet", "swcsum");
    351  1.250.6.2  dyoung 
    352  1.250.6.2  dyoung #define	INET_CSUM_COUNTER_INCR(ev)	(ev)->ev_count++
    353  1.250.6.2  dyoung 
    354  1.250.6.2  dyoung EVCNT_ATTACH_STATIC(ip_hwcsum_bad);
    355  1.250.6.2  dyoung EVCNT_ATTACH_STATIC(ip_hwcsum_ok);
    356  1.250.6.2  dyoung EVCNT_ATTACH_STATIC(ip_swcsum);
    357  1.250.6.2  dyoung 
    358  1.250.6.2  dyoung #else
    359  1.250.6.2  dyoung 
    360  1.250.6.2  dyoung #define	INET_CSUM_COUNTER_INCR(ev)	/* nothing */
    361  1.250.6.2  dyoung 
    362  1.250.6.2  dyoung #endif /* INET_CSUM_COUNTERS */
    363  1.250.6.2  dyoung 
    364  1.250.6.2  dyoung /*
    365  1.250.6.2  dyoung  * We need to save the IP options in case a protocol wants to respond
    366  1.250.6.2  dyoung  * to an incoming packet over the same route if the packet got here
    367  1.250.6.2  dyoung  * using IP source routing.  This allows connection establishment and
    368  1.250.6.2  dyoung  * maintenance when the remote end is on a network that is not known
    369  1.250.6.2  dyoung  * to us.
    370  1.250.6.2  dyoung  */
    371  1.250.6.2  dyoung int	ip_nhops = 0;
    372  1.250.6.2  dyoung static	struct ip_srcrt {
    373  1.250.6.2  dyoung 	struct	in_addr dst;			/* final destination */
    374  1.250.6.2  dyoung 	char	nop;				/* one NOP to align */
    375  1.250.6.2  dyoung 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
    376  1.250.6.2  dyoung 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
    377  1.250.6.2  dyoung } ip_srcrt;
    378  1.250.6.2  dyoung 
    379  1.250.6.2  dyoung static void save_rte(u_char *, struct in_addr);
    380  1.250.6.2  dyoung 
    381  1.250.6.2  dyoung #ifdef MBUFTRACE
    382  1.250.6.2  dyoung struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx");
    383  1.250.6.2  dyoung struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx");
    384  1.250.6.2  dyoung #endif
    385  1.250.6.2  dyoung 
    386  1.250.6.2  dyoung /*
    387  1.250.6.2  dyoung  * Compute IP limits derived from the value of nmbclusters.
    388  1.250.6.2  dyoung  */
    389  1.250.6.2  dyoung static void
    390  1.250.6.2  dyoung ip_nmbclusters_changed(void)
    391  1.250.6.2  dyoung {
    392  1.250.6.2  dyoung 	ip_maxfrags = nmbclusters / 4;
    393  1.250.6.2  dyoung 	ip_nmbclusters =  nmbclusters;
    394  1.250.6.2  dyoung }
    395  1.250.6.2  dyoung 
    396  1.250.6.2  dyoung /*
    397  1.250.6.2  dyoung  * IP initialization: fill in IP protocol switch table.
    398  1.250.6.2  dyoung  * All protocols not implemented in kernel go to raw IP protocol handler.
    399  1.250.6.2  dyoung  */
    400  1.250.6.2  dyoung void
    401  1.250.6.2  dyoung ip_init(void)
    402  1.250.6.2  dyoung {
    403  1.250.6.2  dyoung 	const struct protosw *pr;
    404  1.250.6.2  dyoung 	int i;
    405  1.250.6.2  dyoung 
    406  1.250.6.2  dyoung 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
    407  1.250.6.2  dyoung 	if (pr == 0)
    408  1.250.6.2  dyoung 		panic("ip_init");
    409  1.250.6.2  dyoung 	for (i = 0; i < IPPROTO_MAX; i++)
    410  1.250.6.2  dyoung 		ip_protox[i] = pr - inetsw;
    411  1.250.6.2  dyoung 	for (pr = inetdomain.dom_protosw;
    412  1.250.6.2  dyoung 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
    413  1.250.6.2  dyoung 		if (pr->pr_domain->dom_family == PF_INET &&
    414  1.250.6.2  dyoung 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
    415  1.250.6.2  dyoung 			ip_protox[pr->pr_protocol] = pr - inetsw;
    416  1.250.6.2  dyoung 
    417  1.250.6.2  dyoung 	for (i = 0; i < IPREASS_NHASH; i++)
    418  1.250.6.2  dyoung 	    	LIST_INIT(&ipq[i]);
    419  1.250.6.2  dyoung 
    420  1.250.6.2  dyoung 	ip_id = time_second & 0xfffff;
    421  1.250.6.2  dyoung 
    422  1.250.6.2  dyoung 	ipintrq.ifq_maxlen = ipqmaxlen;
    423  1.250.6.2  dyoung 	ip_nmbclusters_changed();
    424  1.250.6.2  dyoung 
    425  1.250.6.2  dyoung 	TAILQ_INIT(&in_ifaddrhead);
    426  1.250.6.2  dyoung 	in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
    427  1.250.6.2  dyoung 	    M_WAITOK, &in_ifaddrhash);
    428  1.250.6.2  dyoung 	in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IPMADDR,
    429  1.250.6.2  dyoung 	    M_WAITOK, &in_multihash);
    430  1.250.6.2  dyoung 	ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout);
    431  1.250.6.2  dyoung #ifdef GATEWAY
    432  1.250.6.2  dyoung 	ipflow_init(ip_hashsize);
    433  1.250.6.2  dyoung #endif
    434  1.250.6.2  dyoung 
    435  1.250.6.2  dyoung #ifdef PFIL_HOOKS
    436  1.250.6.2  dyoung 	/* Register our Packet Filter hook. */
    437  1.250.6.2  dyoung 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
    438  1.250.6.2  dyoung 	inet_pfil_hook.ph_af   = AF_INET;
    439  1.250.6.2  dyoung 	i = pfil_head_register(&inet_pfil_hook);
    440  1.250.6.2  dyoung 	if (i != 0)
    441  1.250.6.2  dyoung 		printf("ip_init: WARNING: unable to register pfil hook, "
    442  1.250.6.2  dyoung 		    "error %d\n", i);
    443  1.250.6.2  dyoung #endif /* PFIL_HOOKS */
    444  1.250.6.2  dyoung 
    445  1.250.6.2  dyoung #ifdef MBUFTRACE
    446  1.250.6.2  dyoung 	MOWNER_ATTACH(&ip_tx_mowner);
    447  1.250.6.2  dyoung 	MOWNER_ATTACH(&ip_rx_mowner);
    448  1.250.6.2  dyoung #endif /* MBUFTRACE */
    449  1.250.6.2  dyoung }
    450  1.250.6.2  dyoung 
    451  1.250.6.2  dyoung struct	sockaddr_in ipaddr = {
    452  1.250.6.2  dyoung 	.sin_len = sizeof(ipaddr),
    453  1.250.6.2  dyoung 	.sin_family = AF_INET,
    454  1.250.6.2  dyoung };
    455  1.250.6.2  dyoung struct	route ipforward_rt;
    456  1.250.6.2  dyoung 
    457  1.250.6.2  dyoung /*
    458  1.250.6.2  dyoung  * IP software interrupt routine
    459  1.250.6.2  dyoung  */
    460  1.250.6.2  dyoung void
    461  1.250.6.2  dyoung ipintr(void)
    462  1.250.6.2  dyoung {
    463  1.250.6.2  dyoung 	int s;
    464  1.250.6.2  dyoung 	struct mbuf *m;
    465  1.250.6.2  dyoung 
    466  1.250.6.2  dyoung 	while (!IF_IS_EMPTY(&ipintrq)) {
    467  1.250.6.2  dyoung 		s = splnet();
    468  1.250.6.2  dyoung 		IF_DEQUEUE(&ipintrq, m);
    469  1.250.6.2  dyoung 		splx(s);
    470  1.250.6.2  dyoung 		if (m == 0)
    471  1.250.6.2  dyoung 			return;
    472  1.250.6.2  dyoung 		MCLAIM(m, &ip_rx_mowner);
    473  1.250.6.2  dyoung 		ip_input(m);
    474  1.250.6.2  dyoung 	}
    475  1.250.6.2  dyoung }
    476  1.250.6.2  dyoung 
    477  1.250.6.2  dyoung /*
    478  1.250.6.2  dyoung  * Ip input routine.  Checksum and byte swap header.  If fragmented
    479  1.250.6.2  dyoung  * try to reassemble.  Process options.  Pass to next level.
    480  1.250.6.2  dyoung  */
    481  1.250.6.2  dyoung void
    482  1.250.6.2  dyoung ip_input(struct mbuf *m)
    483  1.250.6.2  dyoung {
    484  1.250.6.2  dyoung 	struct ip *ip = NULL;
    485  1.250.6.2  dyoung 	struct ipq *fp;
    486  1.250.6.2  dyoung 	struct in_ifaddr *ia;
    487  1.250.6.2  dyoung 	struct ifaddr *ifa;
    488  1.250.6.2  dyoung 	struct ipqent *ipqe;
    489  1.250.6.2  dyoung 	int hlen = 0, mff, len;
    490  1.250.6.2  dyoung 	int downmatch;
    491  1.250.6.2  dyoung 	int checkif;
    492  1.250.6.2  dyoung 	int srcrt = 0;
    493  1.250.6.2  dyoung 	int s;
    494  1.250.6.2  dyoung 	u_int hash;
    495  1.250.6.2  dyoung #ifdef FAST_IPSEC
    496  1.250.6.2  dyoung 	struct m_tag *mtag;
    497  1.250.6.2  dyoung 	struct tdb_ident *tdbi;
    498  1.250.6.2  dyoung 	struct secpolicy *sp;
    499  1.250.6.2  dyoung 	int error;
    500  1.250.6.2  dyoung #endif /* FAST_IPSEC */
    501  1.250.6.2  dyoung 
    502  1.250.6.2  dyoung 	MCLAIM(m, &ip_rx_mowner);
    503  1.250.6.2  dyoung #ifdef	DIAGNOSTIC
    504  1.250.6.2  dyoung 	if ((m->m_flags & M_PKTHDR) == 0)
    505  1.250.6.2  dyoung 		panic("ipintr no HDR");
    506  1.250.6.2  dyoung #endif
    507  1.250.6.2  dyoung 
    508  1.250.6.2  dyoung 	/*
    509  1.250.6.2  dyoung 	 * If no IP addresses have been set yet but the interfaces
    510  1.250.6.2  dyoung 	 * are receiving, can't do anything with incoming packets yet.
    511  1.250.6.2  dyoung 	 */
    512  1.250.6.2  dyoung 	if (TAILQ_FIRST(&in_ifaddrhead) == 0)
    513  1.250.6.2  dyoung 		goto bad;
    514  1.250.6.2  dyoung 	ipstat.ips_total++;
    515  1.250.6.2  dyoung 	/*
    516  1.250.6.2  dyoung 	 * If the IP header is not aligned, slurp it up into a new
    517  1.250.6.2  dyoung 	 * mbuf with space for link headers, in the event we forward
    518  1.250.6.2  dyoung 	 * it.  Otherwise, if it is aligned, make sure the entire
    519  1.250.6.2  dyoung 	 * base IP header is in the first mbuf of the chain.
    520  1.250.6.2  dyoung 	 */
    521  1.250.6.2  dyoung 	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
    522  1.250.6.2  dyoung 		if ((m = m_copyup(m, sizeof(struct ip),
    523  1.250.6.2  dyoung 				  (max_linkhdr + 3) & ~3)) == NULL) {
    524  1.250.6.2  dyoung 			/* XXXJRT new stat, please */
    525  1.250.6.2  dyoung 			ipstat.ips_toosmall++;
    526  1.250.6.2  dyoung 			return;
    527  1.250.6.2  dyoung 		}
    528  1.250.6.2  dyoung 	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
    529  1.250.6.2  dyoung 		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
    530  1.250.6.2  dyoung 			ipstat.ips_toosmall++;
    531  1.250.6.2  dyoung 			return;
    532  1.250.6.2  dyoung 		}
    533  1.250.6.2  dyoung 	}
    534  1.250.6.2  dyoung 	ip = mtod(m, struct ip *);
    535  1.250.6.2  dyoung 	if (ip->ip_v != IPVERSION) {
    536  1.250.6.2  dyoung 		ipstat.ips_badvers++;
    537  1.250.6.2  dyoung 		goto bad;
    538  1.250.6.2  dyoung 	}
    539  1.250.6.2  dyoung 	hlen = ip->ip_hl << 2;
    540  1.250.6.2  dyoung 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
    541  1.250.6.2  dyoung 		ipstat.ips_badhlen++;
    542  1.250.6.2  dyoung 		goto bad;
    543  1.250.6.2  dyoung 	}
    544  1.250.6.2  dyoung 	if (hlen > m->m_len) {
    545  1.250.6.2  dyoung 		if ((m = m_pullup(m, hlen)) == 0) {
    546  1.250.6.2  dyoung 			ipstat.ips_badhlen++;
    547  1.250.6.2  dyoung 			return;
    548  1.250.6.2  dyoung 		}
    549  1.250.6.2  dyoung 		ip = mtod(m, struct ip *);
    550  1.250.6.2  dyoung 	}
    551  1.250.6.2  dyoung 
    552  1.250.6.2  dyoung 	/*
    553  1.250.6.2  dyoung 	 * RFC1122: packets with a multicast source address are
    554  1.250.6.2  dyoung 	 * not allowed.
    555  1.250.6.2  dyoung 	 */
    556  1.250.6.2  dyoung 	if (IN_MULTICAST(ip->ip_src.s_addr)) {
    557  1.250.6.2  dyoung 		ipstat.ips_badaddr++;
    558  1.250.6.2  dyoung 		goto bad;
    559  1.250.6.2  dyoung 	}
    560  1.250.6.2  dyoung 
    561  1.250.6.2  dyoung 	/* 127/8 must not appear on wire - RFC1122 */
    562  1.250.6.2  dyoung 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
    563  1.250.6.2  dyoung 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
    564  1.250.6.2  dyoung 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
    565  1.250.6.2  dyoung 			ipstat.ips_badaddr++;
    566  1.250.6.2  dyoung 			goto bad;
    567  1.250.6.2  dyoung 		}
    568  1.250.6.2  dyoung 	}
    569  1.250.6.2  dyoung 
    570  1.250.6.2  dyoung 	switch (m->m_pkthdr.csum_flags &
    571  1.250.6.2  dyoung 		((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
    572  1.250.6.2  dyoung 		 M_CSUM_IPv4_BAD)) {
    573  1.250.6.2  dyoung 	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
    574  1.250.6.2  dyoung 		INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
    575  1.250.6.2  dyoung 		goto badcsum;
    576  1.250.6.2  dyoung 
    577  1.250.6.2  dyoung 	case M_CSUM_IPv4:
    578  1.250.6.2  dyoung 		/* Checksum was okay. */
    579  1.250.6.2  dyoung 		INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
    580  1.250.6.2  dyoung 		break;
    581  1.250.6.2  dyoung 
    582  1.250.6.2  dyoung 	default:
    583  1.250.6.2  dyoung 		/*
    584  1.250.6.2  dyoung 		 * Must compute it ourselves.  Maybe skip checksum on
    585  1.250.6.2  dyoung 		 * loopback interfaces.
    586  1.250.6.2  dyoung 		 */
    587  1.250.6.2  dyoung 		if (__predict_true(!(m->m_pkthdr.rcvif->if_flags &
    588  1.250.6.2  dyoung 				     IFF_LOOPBACK) || ip_do_loopback_cksum)) {
    589  1.250.6.2  dyoung 			INET_CSUM_COUNTER_INCR(&ip_swcsum);
    590  1.250.6.2  dyoung 			if (in_cksum(m, hlen) != 0)
    591  1.250.6.2  dyoung 				goto badcsum;
    592  1.250.6.2  dyoung 		}
    593  1.250.6.2  dyoung 		break;
    594  1.250.6.2  dyoung 	}
    595  1.250.6.2  dyoung 
    596  1.250.6.2  dyoung 	/* Retrieve the packet length. */
    597  1.250.6.2  dyoung 	len = ntohs(ip->ip_len);
    598  1.250.6.2  dyoung 
    599  1.250.6.2  dyoung 	/*
    600  1.250.6.2  dyoung 	 * Check for additional length bogosity
    601  1.250.6.2  dyoung 	 */
    602  1.250.6.2  dyoung 	if (len < hlen) {
    603  1.250.6.2  dyoung 	 	ipstat.ips_badlen++;
    604  1.250.6.2  dyoung 		goto bad;
    605  1.250.6.2  dyoung 	}
    606  1.250.6.2  dyoung 
    607  1.250.6.2  dyoung 	/*
    608  1.250.6.2  dyoung 	 * Check that the amount of data in the buffers
    609  1.250.6.2  dyoung 	 * is as at least much as the IP header would have us expect.
    610  1.250.6.2  dyoung 	 * Trim mbufs if longer than we expect.
    611  1.250.6.2  dyoung 	 * Drop packet if shorter than we expect.
    612  1.250.6.2  dyoung 	 */
    613  1.250.6.2  dyoung 	if (m->m_pkthdr.len < len) {
    614  1.250.6.2  dyoung 		ipstat.ips_tooshort++;
    615  1.250.6.2  dyoung 		goto bad;
    616  1.250.6.2  dyoung 	}
    617  1.250.6.2  dyoung 	if (m->m_pkthdr.len > len) {
    618  1.250.6.2  dyoung 		if (m->m_len == m->m_pkthdr.len) {
    619  1.250.6.2  dyoung 			m->m_len = len;
    620  1.250.6.2  dyoung 			m->m_pkthdr.len = len;
    621  1.250.6.2  dyoung 		} else
    622  1.250.6.2  dyoung 			m_adj(m, len - m->m_pkthdr.len);
    623  1.250.6.2  dyoung 	}
    624  1.250.6.2  dyoung 
    625  1.250.6.2  dyoung #if defined(IPSEC)
    626  1.250.6.2  dyoung 	/* ipflow (IP fast forwarding) is not compatible with IPsec. */
    627  1.250.6.2  dyoung 	m->m_flags &= ~M_CANFASTFWD;
    628  1.250.6.2  dyoung #else
    629  1.250.6.2  dyoung 	/*
    630  1.250.6.2  dyoung 	 * Assume that we can create a fast-forward IP flow entry
    631  1.250.6.2  dyoung 	 * based on this packet.
    632  1.250.6.2  dyoung 	 */
    633  1.250.6.2  dyoung 	m->m_flags |= M_CANFASTFWD;
    634  1.250.6.2  dyoung #endif
    635  1.250.6.2  dyoung 
    636  1.250.6.2  dyoung #ifdef PFIL_HOOKS
    637  1.250.6.2  dyoung 	/*
    638  1.250.6.2  dyoung 	 * Run through list of hooks for input packets.  If there are any
    639  1.250.6.2  dyoung 	 * filters which require that additional packets in the flow are
    640  1.250.6.2  dyoung 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
    641  1.250.6.2  dyoung 	 * Note that filters must _never_ set this flag, as another filter
    642  1.250.6.2  dyoung 	 * in the list may have previously cleared it.
    643  1.250.6.2  dyoung 	 */
    644  1.250.6.2  dyoung 	/*
    645  1.250.6.2  dyoung 	 * let ipfilter look at packet on the wire,
    646  1.250.6.2  dyoung 	 * not the decapsulated packet.
    647  1.250.6.2  dyoung 	 */
    648  1.250.6.2  dyoung #ifdef IPSEC
    649  1.250.6.2  dyoung 	if (!ipsec_getnhist(m))
    650  1.250.6.2  dyoung #elif defined(FAST_IPSEC)
    651  1.250.6.2  dyoung 	if (!ipsec_indone(m))
    652  1.250.6.2  dyoung #else
    653  1.250.6.2  dyoung 	if (1)
    654  1.250.6.2  dyoung #endif
    655  1.250.6.2  dyoung 	{
    656  1.250.6.2  dyoung 		struct in_addr odst;
    657  1.250.6.2  dyoung 
    658  1.250.6.2  dyoung 		odst = ip->ip_dst;
    659  1.250.6.2  dyoung 		if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
    660  1.250.6.2  dyoung 		    PFIL_IN) != 0)
    661  1.250.6.2  dyoung 			return;
    662  1.250.6.2  dyoung 		if (m == NULL)
    663  1.250.6.2  dyoung 			return;
    664  1.250.6.2  dyoung 		ip = mtod(m, struct ip *);
    665  1.250.6.2  dyoung 		hlen = ip->ip_hl << 2;
    666  1.250.6.2  dyoung 		/*
    667  1.250.6.2  dyoung 		 * XXX The setting of "srcrt" here is to prevent ip_forward()
    668  1.250.6.2  dyoung 		 * from generating ICMP redirects for packets that have
    669  1.250.6.2  dyoung 		 * been redirected by a hook back out on to the same LAN that
    670  1.250.6.2  dyoung 		 * they came from and is not an indication that the packet
    671  1.250.6.2  dyoung 		 * is being inffluenced by source routing options.  This
    672  1.250.6.2  dyoung 		 * allows things like
    673  1.250.6.2  dyoung 		 * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp"
    674  1.250.6.2  dyoung 		 * where tlp0 is both on the 1.1.1.0/24 network and is the
    675  1.250.6.2  dyoung 		 * default route for hosts on 1.1.1.0/24.  Of course this
    676  1.250.6.2  dyoung 		 * also requires a "map tlp0 ..." to complete the story.
    677  1.250.6.2  dyoung 		 * One might argue whether or not this kind of network config.
    678  1.250.6.2  dyoung 		 * should be supported in this manner...
    679  1.250.6.2  dyoung 		 */
    680  1.250.6.2  dyoung 		srcrt = (odst.s_addr != ip->ip_dst.s_addr);
    681  1.250.6.2  dyoung 	}
    682  1.250.6.2  dyoung #endif /* PFIL_HOOKS */
    683  1.250.6.2  dyoung 
    684  1.250.6.2  dyoung #ifdef ALTQ
    685  1.250.6.2  dyoung 	/* XXX Temporary until ALTQ is changed to use a pfil hook */
    686  1.250.6.2  dyoung 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) {
    687  1.250.6.2  dyoung 		/* packet dropped by traffic conditioner */
    688  1.250.6.2  dyoung 		return;
    689  1.250.6.2  dyoung 	}
    690  1.250.6.2  dyoung #endif
    691  1.250.6.2  dyoung 
    692  1.250.6.2  dyoung 	/*
    693  1.250.6.2  dyoung 	 * Process options and, if not destined for us,
    694  1.250.6.2  dyoung 	 * ship it on.  ip_dooptions returns 1 when an
    695  1.250.6.2  dyoung 	 * error was detected (causing an icmp message
    696  1.250.6.2  dyoung 	 * to be sent and the original packet to be freed).
    697  1.250.6.2  dyoung 	 */
    698  1.250.6.2  dyoung 	ip_nhops = 0;		/* for source routed packets */
    699  1.250.6.2  dyoung 	if (hlen > sizeof (struct ip) && ip_dooptions(m))
    700  1.250.6.2  dyoung 		return;
    701  1.250.6.2  dyoung 
    702  1.250.6.2  dyoung 	/*
    703  1.250.6.2  dyoung 	 * Enable a consistency check between the destination address
    704  1.250.6.2  dyoung 	 * and the arrival interface for a unicast packet (the RFC 1122
    705  1.250.6.2  dyoung 	 * strong ES model) if IP forwarding is disabled and the packet
    706  1.250.6.2  dyoung 	 * is not locally generated.
    707  1.250.6.2  dyoung 	 *
    708  1.250.6.2  dyoung 	 * XXX - Checking also should be disabled if the destination
    709  1.250.6.2  dyoung 	 * address is ipnat'ed to a different interface.
    710  1.250.6.2  dyoung 	 *
    711  1.250.6.2  dyoung 	 * XXX - Checking is incompatible with IP aliases added
    712  1.250.6.2  dyoung 	 * to the loopback interface instead of the interface where
    713  1.250.6.2  dyoung 	 * the packets are received.
    714  1.250.6.2  dyoung 	 *
    715  1.250.6.2  dyoung 	 * XXX - We need to add a per ifaddr flag for this so that
    716  1.250.6.2  dyoung 	 * we get finer grain control.
    717  1.250.6.2  dyoung 	 */
    718  1.250.6.2  dyoung 	checkif = ip_checkinterface && (ipforwarding == 0) &&
    719  1.250.6.2  dyoung 	    (m->m_pkthdr.rcvif != NULL) &&
    720  1.250.6.2  dyoung 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0);
    721  1.250.6.2  dyoung 
    722  1.250.6.2  dyoung 	/*
    723  1.250.6.2  dyoung 	 * Check our list of addresses, to see if the packet is for us.
    724  1.250.6.2  dyoung 	 *
    725  1.250.6.2  dyoung 	 * Traditional 4.4BSD did not consult IFF_UP at all.
    726  1.250.6.2  dyoung 	 * The behavior here is to treat addresses on !IFF_UP interface
    727  1.250.6.2  dyoung 	 * as not mine.
    728  1.250.6.2  dyoung 	 */
    729  1.250.6.2  dyoung 	downmatch = 0;
    730  1.250.6.2  dyoung 	LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
    731  1.250.6.2  dyoung 		if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
    732  1.250.6.2  dyoung 			if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif)
    733  1.250.6.2  dyoung 				continue;
    734  1.250.6.2  dyoung 			if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
    735  1.250.6.2  dyoung 				break;
    736  1.250.6.2  dyoung 			else
    737  1.250.6.2  dyoung 				downmatch++;
    738  1.250.6.2  dyoung 		}
    739  1.250.6.2  dyoung 	}
    740  1.250.6.2  dyoung 	if (ia != NULL)
    741  1.250.6.2  dyoung 		goto ours;
    742  1.250.6.2  dyoung 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
    743  1.250.6.2  dyoung 		IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) {
    744  1.250.6.2  dyoung 			if (ifa->ifa_addr->sa_family != AF_INET)
    745  1.250.6.2  dyoung 				continue;
    746  1.250.6.2  dyoung 			ia = ifatoia(ifa);
    747  1.250.6.2  dyoung 			if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
    748  1.250.6.2  dyoung 			    in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
    749  1.250.6.2  dyoung 			    /*
    750  1.250.6.2  dyoung 			     * Look for all-0's host part (old broadcast addr),
    751  1.250.6.2  dyoung 			     * either for subnet or net.
    752  1.250.6.2  dyoung 			     */
    753  1.250.6.2  dyoung 			    ip->ip_dst.s_addr == ia->ia_subnet ||
    754  1.250.6.2  dyoung 			    ip->ip_dst.s_addr == ia->ia_net)
    755  1.250.6.2  dyoung 				goto ours;
    756  1.250.6.2  dyoung 			/*
    757  1.250.6.2  dyoung 			 * An interface with IP address zero accepts
    758  1.250.6.2  dyoung 			 * all packets that arrive on that interface.
    759  1.250.6.2  dyoung 			 */
    760  1.250.6.2  dyoung 			if (in_nullhost(ia->ia_addr.sin_addr))
    761  1.250.6.2  dyoung 				goto ours;
    762  1.250.6.2  dyoung 		}
    763  1.250.6.2  dyoung 	}
    764  1.250.6.2  dyoung 	if (IN_MULTICAST(ip->ip_dst.s_addr)) {
    765  1.250.6.2  dyoung 		struct in_multi *inm;
    766  1.250.6.2  dyoung #ifdef MROUTING
    767  1.250.6.2  dyoung 		extern struct socket *ip_mrouter;
    768  1.250.6.2  dyoung 
    769  1.250.6.2  dyoung 		if (ip_mrouter) {
    770  1.250.6.2  dyoung 			/*
    771  1.250.6.2  dyoung 			 * If we are acting as a multicast router, all
    772  1.250.6.2  dyoung 			 * incoming multicast packets are passed to the
    773  1.250.6.2  dyoung 			 * kernel-level multicast forwarding function.
    774  1.250.6.2  dyoung 			 * The packet is returned (relatively) intact; if
    775  1.250.6.2  dyoung 			 * ip_mforward() returns a non-zero value, the packet
    776  1.250.6.2  dyoung 			 * must be discarded, else it may be accepted below.
    777  1.250.6.2  dyoung 			 *
    778  1.250.6.2  dyoung 			 * (The IP ident field is put in the same byte order
    779  1.250.6.2  dyoung 			 * as expected when ip_mforward() is called from
    780  1.250.6.2  dyoung 			 * ip_output().)
    781  1.250.6.2  dyoung 			 */
    782  1.250.6.2  dyoung 			if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
    783  1.250.6.2  dyoung 				ipstat.ips_cantforward++;
    784  1.250.6.2  dyoung 				m_freem(m);
    785  1.250.6.2  dyoung 				return;
    786  1.250.6.2  dyoung 			}
    787  1.250.6.2  dyoung 
    788  1.250.6.2  dyoung 			/*
    789  1.250.6.2  dyoung 			 * The process-level routing demon needs to receive
    790  1.250.6.2  dyoung 			 * all multicast IGMP packets, whether or not this
    791  1.250.6.2  dyoung 			 * host belongs to their destination groups.
    792  1.250.6.2  dyoung 			 */
    793  1.250.6.2  dyoung 			if (ip->ip_p == IPPROTO_IGMP)
    794  1.250.6.2  dyoung 				goto ours;
    795  1.250.6.2  dyoung 			ipstat.ips_forward++;
    796  1.250.6.2  dyoung 		}
    797  1.250.6.2  dyoung #endif
    798  1.250.6.2  dyoung 		/*
    799  1.250.6.2  dyoung 		 * See if we belong to the destination multicast group on the
    800  1.250.6.2  dyoung 		 * arrival interface.
    801  1.250.6.2  dyoung 		 */
    802  1.250.6.2  dyoung 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
    803  1.250.6.2  dyoung 		if (inm == NULL) {
    804  1.250.6.2  dyoung 			ipstat.ips_cantforward++;
    805  1.250.6.2  dyoung 			m_freem(m);
    806  1.250.6.2  dyoung 			return;
    807  1.250.6.2  dyoung 		}
    808  1.250.6.2  dyoung 		goto ours;
    809  1.250.6.2  dyoung 	}
    810  1.250.6.2  dyoung 	if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    811  1.250.6.2  dyoung 	    in_nullhost(ip->ip_dst))
    812  1.250.6.2  dyoung 		goto ours;
    813  1.250.6.2  dyoung 
    814  1.250.6.2  dyoung 	/*
    815  1.250.6.2  dyoung 	 * Not for us; forward if possible and desirable.
    816  1.250.6.2  dyoung 	 */
    817  1.250.6.2  dyoung 	if (ipforwarding == 0) {
    818  1.250.6.2  dyoung 		ipstat.ips_cantforward++;
    819  1.250.6.2  dyoung 		m_freem(m);
    820  1.250.6.2  dyoung 	} else {
    821  1.250.6.2  dyoung 		/*
    822  1.250.6.2  dyoung 		 * If ip_dst matched any of my address on !IFF_UP interface,
    823  1.250.6.2  dyoung 		 * and there's no IFF_UP interface that matches ip_dst,
    824  1.250.6.2  dyoung 		 * send icmp unreach.  Forwarding it will result in in-kernel
    825  1.250.6.2  dyoung 		 * forwarding loop till TTL goes to 0.
    826  1.250.6.2  dyoung 		 */
    827  1.250.6.2  dyoung 		if (downmatch) {
    828  1.250.6.2  dyoung 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
    829  1.250.6.2  dyoung 			ipstat.ips_cantforward++;
    830  1.250.6.2  dyoung 			return;
    831  1.250.6.2  dyoung 		}
    832  1.250.6.2  dyoung #ifdef IPSEC
    833  1.250.6.2  dyoung 		if (ipsec4_in_reject(m, NULL)) {
    834  1.250.6.2  dyoung 			ipsecstat.in_polvio++;
    835  1.250.6.2  dyoung 			goto bad;
    836  1.250.6.2  dyoung 		}
    837  1.250.6.2  dyoung #endif
    838  1.250.6.2  dyoung #ifdef FAST_IPSEC
    839  1.250.6.2  dyoung 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
    840  1.250.6.2  dyoung 		s = splsoftnet();
    841  1.250.6.2  dyoung 		if (mtag != NULL) {
    842  1.250.6.2  dyoung 			tdbi = (struct tdb_ident *)(mtag + 1);
    843  1.250.6.2  dyoung 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
    844  1.250.6.2  dyoung 		} else {
    845  1.250.6.2  dyoung 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
    846  1.250.6.2  dyoung 						   IP_FORWARDING, &error);
    847  1.250.6.2  dyoung 		}
    848  1.250.6.2  dyoung 		if (sp == NULL) {	/* NB: can happen if error */
    849  1.250.6.2  dyoung 			splx(s);
    850  1.250.6.2  dyoung 			/*XXX error stat???*/
    851  1.250.6.2  dyoung 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
    852  1.250.6.2  dyoung 			goto bad;
    853  1.250.6.2  dyoung 		}
    854  1.250.6.2  dyoung 
    855  1.250.6.2  dyoung 		/*
    856  1.250.6.2  dyoung 		 * Check security policy against packet attributes.
    857  1.250.6.2  dyoung 		 */
    858  1.250.6.2  dyoung 		error = ipsec_in_reject(sp, m);
    859  1.250.6.2  dyoung 		KEY_FREESP(&sp);
    860  1.250.6.2  dyoung 		splx(s);
    861  1.250.6.2  dyoung 		if (error) {
    862  1.250.6.2  dyoung 			ipstat.ips_cantforward++;
    863  1.250.6.2  dyoung 			goto bad;
    864  1.250.6.2  dyoung 		}
    865  1.250.6.2  dyoung 
    866  1.250.6.2  dyoung 		/*
    867  1.250.6.2  dyoung 		 * Peek at the outbound SP for this packet to determine if
    868  1.250.6.2  dyoung 		 * it's a Fast Forward candidate.
    869  1.250.6.2  dyoung 		 */
    870  1.250.6.2  dyoung 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
    871  1.250.6.2  dyoung 		if (mtag != NULL)
    872  1.250.6.2  dyoung 			m->m_flags &= ~M_CANFASTFWD;
    873  1.250.6.2  dyoung 		else {
    874  1.250.6.2  dyoung 			s = splsoftnet();
    875  1.250.6.2  dyoung 			sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND,
    876  1.250.6.2  dyoung 			    (IP_FORWARDING |
    877  1.250.6.2  dyoung 			     (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
    878  1.250.6.2  dyoung 			    &error, NULL);
    879  1.250.6.2  dyoung 			if (sp != NULL) {
    880  1.250.6.2  dyoung 				m->m_flags &= ~M_CANFASTFWD;
    881  1.250.6.2  dyoung 				KEY_FREESP(&sp);
    882  1.250.6.2  dyoung 			}
    883  1.250.6.2  dyoung 			splx(s);
    884  1.250.6.2  dyoung 		}
    885  1.250.6.2  dyoung #endif	/* FAST_IPSEC */
    886  1.250.6.2  dyoung 
    887  1.250.6.2  dyoung 		ip_forward(m, srcrt);
    888  1.250.6.2  dyoung 	}
    889  1.250.6.2  dyoung 	return;
    890  1.250.6.2  dyoung 
    891  1.250.6.2  dyoung ours:
    892  1.250.6.2  dyoung 	/*
    893  1.250.6.2  dyoung 	 * If offset or IP_MF are set, must reassemble.
    894  1.250.6.2  dyoung 	 * Otherwise, nothing need be done.
    895  1.250.6.2  dyoung 	 * (We could look in the reassembly queue to see
    896  1.250.6.2  dyoung 	 * if the packet was previously fragmented,
    897  1.250.6.2  dyoung 	 * but it's not worth the time; just let them time out.)
    898  1.250.6.2  dyoung 	 */
    899  1.250.6.2  dyoung 	if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
    900  1.250.6.2  dyoung 
    901  1.250.6.2  dyoung 		/*
    902  1.250.6.2  dyoung 		 * Look for queue of fragments
    903  1.250.6.2  dyoung 		 * of this datagram.
    904  1.250.6.2  dyoung 		 */
    905  1.250.6.2  dyoung 		IPQ_LOCK();
    906  1.250.6.2  dyoung 		hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
    907  1.250.6.2  dyoung 		LIST_FOREACH(fp, &ipq[hash], ipq_q) {
    908  1.250.6.2  dyoung 			if (ip->ip_id == fp->ipq_id &&
    909  1.250.6.2  dyoung 			    in_hosteq(ip->ip_src, fp->ipq_src) &&
    910  1.250.6.2  dyoung 			    in_hosteq(ip->ip_dst, fp->ipq_dst) &&
    911  1.250.6.2  dyoung 			    ip->ip_p == fp->ipq_p)
    912  1.250.6.2  dyoung 				goto found;
    913  1.250.6.2  dyoung 
    914  1.250.6.2  dyoung 		}
    915  1.250.6.2  dyoung 		fp = 0;
    916  1.250.6.2  dyoung found:
    917  1.250.6.2  dyoung 
    918  1.250.6.2  dyoung 		/*
    919  1.250.6.2  dyoung 		 * Adjust ip_len to not reflect header,
    920  1.250.6.2  dyoung 		 * set ipqe_mff if more fragments are expected,
    921  1.250.6.2  dyoung 		 * convert offset of this to bytes.
    922  1.250.6.2  dyoung 		 */
    923  1.250.6.2  dyoung 		ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
    924  1.250.6.2  dyoung 		mff = (ip->ip_off & htons(IP_MF)) != 0;
    925  1.250.6.2  dyoung 		if (mff) {
    926  1.250.6.2  dyoung 		        /*
    927  1.250.6.2  dyoung 		         * Make sure that fragments have a data length
    928  1.250.6.2  dyoung 			 * that's a non-zero multiple of 8 bytes.
    929  1.250.6.2  dyoung 		         */
    930  1.250.6.2  dyoung 			if (ntohs(ip->ip_len) == 0 ||
    931  1.250.6.2  dyoung 			    (ntohs(ip->ip_len) & 0x7) != 0) {
    932  1.250.6.2  dyoung 				ipstat.ips_badfrags++;
    933  1.250.6.2  dyoung 				IPQ_UNLOCK();
    934  1.250.6.2  dyoung 				goto bad;
    935  1.250.6.2  dyoung 			}
    936  1.250.6.2  dyoung 		}
    937  1.250.6.2  dyoung 		ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3);
    938  1.250.6.2  dyoung 
    939  1.250.6.2  dyoung 		/*
    940  1.250.6.2  dyoung 		 * If datagram marked as having more fragments
    941  1.250.6.2  dyoung 		 * or if this is not the first fragment,
    942  1.250.6.2  dyoung 		 * attempt reassembly; if it succeeds, proceed.
    943  1.250.6.2  dyoung 		 */
    944  1.250.6.2  dyoung 		if (mff || ip->ip_off != htons(0)) {
    945  1.250.6.2  dyoung 			ipstat.ips_fragments++;
    946  1.250.6.2  dyoung 			s = splvm();
    947  1.250.6.2  dyoung 			ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
    948  1.250.6.2  dyoung 			splx(s);
    949  1.250.6.2  dyoung 			if (ipqe == NULL) {
    950  1.250.6.2  dyoung 				ipstat.ips_rcvmemdrop++;
    951  1.250.6.2  dyoung 				IPQ_UNLOCK();
    952  1.250.6.2  dyoung 				goto bad;
    953  1.250.6.2  dyoung 			}
    954  1.250.6.2  dyoung 			ipqe->ipqe_mff = mff;
    955  1.250.6.2  dyoung 			ipqe->ipqe_m = m;
    956  1.250.6.2  dyoung 			ipqe->ipqe_ip = ip;
    957  1.250.6.2  dyoung 			m = ip_reass(ipqe, fp, &ipq[hash]);
    958  1.250.6.2  dyoung 			if (m == 0) {
    959  1.250.6.2  dyoung 				IPQ_UNLOCK();
    960  1.250.6.2  dyoung 				return;
    961  1.250.6.2  dyoung 			}
    962  1.250.6.2  dyoung 			ipstat.ips_reassembled++;
    963  1.250.6.2  dyoung 			ip = mtod(m, struct ip *);
    964  1.250.6.2  dyoung 			hlen = ip->ip_hl << 2;
    965  1.250.6.2  dyoung 			ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
    966  1.250.6.2  dyoung 		} else
    967  1.250.6.2  dyoung 			if (fp)
    968  1.250.6.2  dyoung 				ip_freef(fp);
    969  1.250.6.2  dyoung 		IPQ_UNLOCK();
    970  1.250.6.2  dyoung 	}
    971  1.250.6.2  dyoung 
    972  1.250.6.2  dyoung #if defined(IPSEC)
    973  1.250.6.2  dyoung 	/*
    974  1.250.6.2  dyoung 	 * enforce IPsec policy checking if we are seeing last header.
    975  1.250.6.2  dyoung 	 * note that we do not visit this with protocols with pcb layer
    976  1.250.6.2  dyoung 	 * code - like udp/tcp/raw ip.
    977  1.250.6.2  dyoung 	 */
    978  1.250.6.2  dyoung 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
    979  1.250.6.2  dyoung 	    ipsec4_in_reject(m, NULL)) {
    980  1.250.6.2  dyoung 		ipsecstat.in_polvio++;
    981  1.250.6.2  dyoung 		goto bad;
    982  1.250.6.2  dyoung 	}
    983  1.250.6.2  dyoung #endif
    984  1.250.6.2  dyoung #ifdef FAST_IPSEC
    985  1.250.6.2  dyoung 	/*
    986  1.250.6.2  dyoung 	 * enforce IPsec policy checking if we are seeing last header.
    987  1.250.6.2  dyoung 	 * note that we do not visit this with protocols with pcb layer
    988  1.250.6.2  dyoung 	 * code - like udp/tcp/raw ip.
    989  1.250.6.2  dyoung 	 */
    990  1.250.6.2  dyoung 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
    991  1.250.6.2  dyoung 		/*
    992  1.250.6.2  dyoung 		 * Check if the packet has already had IPsec processing
    993  1.250.6.2  dyoung 		 * done.  If so, then just pass it along.  This tag gets
    994  1.250.6.2  dyoung 		 * set during AH, ESP, etc. input handling, before the
    995  1.250.6.2  dyoung 		 * packet is returned to the ip input queue for delivery.
    996  1.250.6.2  dyoung 		 */
    997  1.250.6.2  dyoung 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
    998  1.250.6.2  dyoung 		s = splsoftnet();
    999  1.250.6.2  dyoung 		if (mtag != NULL) {
   1000  1.250.6.2  dyoung 			tdbi = (struct tdb_ident *)(mtag + 1);
   1001  1.250.6.2  dyoung 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
   1002  1.250.6.2  dyoung 		} else {
   1003  1.250.6.2  dyoung 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
   1004  1.250.6.2  dyoung 						   IP_FORWARDING, &error);
   1005  1.250.6.2  dyoung 		}
   1006  1.250.6.2  dyoung 		if (sp != NULL) {
   1007  1.250.6.2  dyoung 			/*
   1008  1.250.6.2  dyoung 			 * Check security policy against packet attributes.
   1009  1.250.6.2  dyoung 			 */
   1010  1.250.6.2  dyoung 			error = ipsec_in_reject(sp, m);
   1011  1.250.6.2  dyoung 			KEY_FREESP(&sp);
   1012  1.250.6.2  dyoung 		} else {
   1013  1.250.6.2  dyoung 			/* XXX error stat??? */
   1014  1.250.6.2  dyoung 			error = EINVAL;
   1015  1.250.6.2  dyoung DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
   1016  1.250.6.2  dyoung 			goto bad;
   1017  1.250.6.2  dyoung 		}
   1018  1.250.6.2  dyoung 		splx(s);
   1019  1.250.6.2  dyoung 		if (error)
   1020  1.250.6.2  dyoung 			goto bad;
   1021  1.250.6.2  dyoung 	}
   1022  1.250.6.2  dyoung #endif /* FAST_IPSEC */
   1023  1.250.6.2  dyoung 
   1024  1.250.6.2  dyoung 	/*
   1025  1.250.6.2  dyoung 	 * Switch out to protocol's input routine.
   1026  1.250.6.2  dyoung 	 */
   1027  1.250.6.2  dyoung #if IFA_STATS
   1028  1.250.6.2  dyoung 	if (ia && ip)
   1029  1.250.6.2  dyoung 		ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len);
   1030  1.250.6.2  dyoung #endif
   1031  1.250.6.2  dyoung 	ipstat.ips_delivered++;
   1032  1.250.6.2  dyoung     {
   1033  1.250.6.2  dyoung 	int off = hlen, nh = ip->ip_p;
   1034  1.250.6.2  dyoung 
   1035  1.250.6.2  dyoung 	(*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
   1036  1.250.6.2  dyoung 	return;
   1037  1.250.6.2  dyoung     }
   1038  1.250.6.2  dyoung bad:
   1039  1.250.6.2  dyoung 	m_freem(m);
   1040  1.250.6.2  dyoung 	return;
   1041  1.250.6.2  dyoung 
   1042  1.250.6.2  dyoung badcsum:
   1043  1.250.6.2  dyoung 	ipstat.ips_badsum++;
   1044  1.250.6.2  dyoung 	m_freem(m);
   1045  1.250.6.2  dyoung }
   1046  1.250.6.2  dyoung 
   1047  1.250.6.2  dyoung /*
   1048  1.250.6.2  dyoung  * Take incoming datagram fragment and try to
   1049  1.250.6.2  dyoung  * reassemble it into whole datagram.  If a chain for
   1050  1.250.6.2  dyoung  * reassembly of this datagram already exists, then it
   1051  1.250.6.2  dyoung  * is given as fp; otherwise have to make a chain.
   1052  1.250.6.2  dyoung  */
   1053  1.250.6.2  dyoung struct mbuf *
   1054  1.250.6.2  dyoung ip_reass(struct ipqent *ipqe, struct ipq *fp, struct ipqhead *ipqhead)
   1055  1.250.6.2  dyoung {
   1056  1.250.6.2  dyoung 	struct mbuf *m = ipqe->ipqe_m;
   1057  1.250.6.2  dyoung 	struct ipqent *nq, *p, *q;
   1058  1.250.6.2  dyoung 	struct ip *ip;
   1059  1.250.6.2  dyoung 	struct mbuf *t;
   1060  1.250.6.2  dyoung 	int hlen = ipqe->ipqe_ip->ip_hl << 2;
   1061  1.250.6.2  dyoung 	int i, next, s;
   1062  1.250.6.2  dyoung 
   1063  1.250.6.2  dyoung 	IPQ_LOCK_CHECK();
   1064  1.250.6.2  dyoung 
   1065  1.250.6.2  dyoung 	/*
   1066  1.250.6.2  dyoung 	 * Presence of header sizes in mbufs
   1067  1.250.6.2  dyoung 	 * would confuse code below.
   1068  1.250.6.2  dyoung 	 */
   1069  1.250.6.2  dyoung 	m->m_data += hlen;
   1070  1.250.6.2  dyoung 	m->m_len -= hlen;
   1071  1.250.6.2  dyoung 
   1072  1.250.6.2  dyoung #ifdef	notyet
   1073  1.250.6.2  dyoung 	/* make sure fragment limit is up-to-date */
   1074  1.250.6.2  dyoung 	CHECK_NMBCLUSTER_PARAMS();
   1075  1.250.6.2  dyoung 
   1076  1.250.6.2  dyoung 	/* If we have too many fragments, drop the older half. */
   1077  1.250.6.2  dyoung 	if (ip_nfrags >= ip_maxfrags)
   1078  1.250.6.2  dyoung 		ip_reass_drophalf(void);
   1079  1.250.6.2  dyoung #endif
   1080  1.250.6.2  dyoung 
   1081  1.250.6.2  dyoung 	/*
   1082  1.250.6.2  dyoung 	 * We are about to add a fragment; increment frag count.
   1083  1.250.6.2  dyoung 	 */
   1084  1.250.6.2  dyoung 	ip_nfrags++;
   1085  1.250.6.2  dyoung 
   1086  1.250.6.2  dyoung 	/*
   1087  1.250.6.2  dyoung 	 * If first fragment to arrive, create a reassembly queue.
   1088  1.250.6.2  dyoung 	 */
   1089  1.250.6.2  dyoung 	if (fp == 0) {
   1090  1.250.6.2  dyoung 		/*
   1091  1.250.6.2  dyoung 		 * Enforce upper bound on number of fragmented packets
   1092  1.250.6.2  dyoung 		 * for which we attempt reassembly;
   1093  1.250.6.2  dyoung 		 * If maxfrag is 0, never accept fragments.
   1094  1.250.6.2  dyoung 		 * If maxfrag is -1, accept all fragments without limitation.
   1095  1.250.6.2  dyoung 		 */
   1096  1.250.6.2  dyoung 		if (ip_maxfragpackets < 0)
   1097  1.250.6.2  dyoung 			;
   1098  1.250.6.2  dyoung 		else if (ip_nfragpackets >= ip_maxfragpackets)
   1099  1.250.6.2  dyoung 			goto dropfrag;
   1100  1.250.6.2  dyoung 		ip_nfragpackets++;
   1101  1.250.6.2  dyoung 		MALLOC(fp, struct ipq *, sizeof (struct ipq),
   1102  1.250.6.2  dyoung 		    M_FTABLE, M_NOWAIT);
   1103  1.250.6.2  dyoung 		if (fp == NULL)
   1104  1.250.6.2  dyoung 			goto dropfrag;
   1105  1.250.6.2  dyoung 		LIST_INSERT_HEAD(ipqhead, fp, ipq_q);
   1106  1.250.6.2  dyoung 		fp->ipq_nfrags = 1;
   1107  1.250.6.2  dyoung 		fp->ipq_ttl = IPFRAGTTL;
   1108  1.250.6.2  dyoung 		fp->ipq_p = ipqe->ipqe_ip->ip_p;
   1109  1.250.6.2  dyoung 		fp->ipq_id = ipqe->ipqe_ip->ip_id;
   1110  1.250.6.2  dyoung 		TAILQ_INIT(&fp->ipq_fragq);
   1111  1.250.6.2  dyoung 		fp->ipq_src = ipqe->ipqe_ip->ip_src;
   1112  1.250.6.2  dyoung 		fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
   1113  1.250.6.2  dyoung 		p = NULL;
   1114  1.250.6.2  dyoung 		goto insert;
   1115  1.250.6.2  dyoung 	} else {
   1116  1.250.6.2  dyoung 		fp->ipq_nfrags++;
   1117  1.250.6.2  dyoung 	}
   1118  1.250.6.2  dyoung 
   1119  1.250.6.2  dyoung 	/*
   1120  1.250.6.2  dyoung 	 * Find a segment which begins after this one does.
   1121  1.250.6.2  dyoung 	 */
   1122  1.250.6.2  dyoung 	for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
   1123  1.250.6.2  dyoung 	    p = q, q = TAILQ_NEXT(q, ipqe_q))
   1124  1.250.6.2  dyoung 		if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
   1125  1.250.6.2  dyoung 			break;
   1126  1.250.6.2  dyoung 
   1127  1.250.6.2  dyoung 	/*
   1128  1.250.6.2  dyoung 	 * If there is a preceding segment, it may provide some of
   1129  1.250.6.2  dyoung 	 * our data already.  If so, drop the data from the incoming
   1130  1.250.6.2  dyoung 	 * segment.  If it provides all of our data, drop us.
   1131  1.250.6.2  dyoung 	 */
   1132  1.250.6.2  dyoung 	if (p != NULL) {
   1133  1.250.6.2  dyoung 		i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
   1134  1.250.6.2  dyoung 		    ntohs(ipqe->ipqe_ip->ip_off);
   1135  1.250.6.2  dyoung 		if (i > 0) {
   1136  1.250.6.2  dyoung 			if (i >= ntohs(ipqe->ipqe_ip->ip_len))
   1137  1.250.6.2  dyoung 				goto dropfrag;
   1138  1.250.6.2  dyoung 			m_adj(ipqe->ipqe_m, i);
   1139  1.250.6.2  dyoung 			ipqe->ipqe_ip->ip_off =
   1140  1.250.6.2  dyoung 			    htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
   1141  1.250.6.2  dyoung 			ipqe->ipqe_ip->ip_len =
   1142  1.250.6.2  dyoung 			    htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
   1143  1.250.6.2  dyoung 		}
   1144  1.250.6.2  dyoung 	}
   1145  1.250.6.2  dyoung 
   1146  1.250.6.2  dyoung 	/*
   1147  1.250.6.2  dyoung 	 * While we overlap succeeding segments trim them or,
   1148  1.250.6.2  dyoung 	 * if they are completely covered, dequeue them.
   1149  1.250.6.2  dyoung 	 */
   1150  1.250.6.2  dyoung 	for (; q != NULL &&
   1151  1.250.6.2  dyoung 	    ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
   1152  1.250.6.2  dyoung 	    ntohs(q->ipqe_ip->ip_off); q = nq) {
   1153  1.250.6.2  dyoung 		i = (ntohs(ipqe->ipqe_ip->ip_off) +
   1154  1.250.6.2  dyoung 		    ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
   1155  1.250.6.2  dyoung 		if (i < ntohs(q->ipqe_ip->ip_len)) {
   1156  1.250.6.2  dyoung 			q->ipqe_ip->ip_len =
   1157  1.250.6.2  dyoung 			    htons(ntohs(q->ipqe_ip->ip_len) - i);
   1158  1.250.6.2  dyoung 			q->ipqe_ip->ip_off =
   1159  1.250.6.2  dyoung 			    htons(ntohs(q->ipqe_ip->ip_off) + i);
   1160  1.250.6.2  dyoung 			m_adj(q->ipqe_m, i);
   1161  1.250.6.2  dyoung 			break;
   1162  1.250.6.2  dyoung 		}
   1163  1.250.6.2  dyoung 		nq = TAILQ_NEXT(q, ipqe_q);
   1164  1.250.6.2  dyoung 		m_freem(q->ipqe_m);
   1165  1.250.6.2  dyoung 		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
   1166  1.250.6.2  dyoung 		s = splvm();
   1167  1.250.6.2  dyoung 		pool_put(&ipqent_pool, q);
   1168  1.250.6.2  dyoung 		splx(s);
   1169  1.250.6.2  dyoung 		fp->ipq_nfrags--;
   1170  1.250.6.2  dyoung 		ip_nfrags--;
   1171  1.250.6.2  dyoung 	}
   1172  1.250.6.2  dyoung 
   1173  1.250.6.2  dyoung insert:
   1174  1.250.6.2  dyoung 	/*
   1175  1.250.6.2  dyoung 	 * Stick new segment in its place;
   1176  1.250.6.2  dyoung 	 * check for complete reassembly.
   1177  1.250.6.2  dyoung 	 */
   1178  1.250.6.2  dyoung 	if (p == NULL) {
   1179  1.250.6.2  dyoung 		TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
   1180  1.250.6.2  dyoung 	} else {
   1181  1.250.6.2  dyoung 		TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
   1182  1.250.6.2  dyoung 	}
   1183  1.250.6.2  dyoung 	next = 0;
   1184  1.250.6.2  dyoung 	for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
   1185  1.250.6.2  dyoung 	    p = q, q = TAILQ_NEXT(q, ipqe_q)) {
   1186  1.250.6.2  dyoung 		if (ntohs(q->ipqe_ip->ip_off) != next)
   1187  1.250.6.2  dyoung 			return (0);
   1188  1.250.6.2  dyoung 		next += ntohs(q->ipqe_ip->ip_len);
   1189  1.250.6.2  dyoung 	}
   1190  1.250.6.2  dyoung 	if (p->ipqe_mff)
   1191  1.250.6.2  dyoung 		return (0);
   1192  1.250.6.2  dyoung 
   1193  1.250.6.2  dyoung 	/*
   1194  1.250.6.2  dyoung 	 * Reassembly is complete.  Check for a bogus message size and
   1195  1.250.6.2  dyoung 	 * concatenate fragments.
   1196  1.250.6.2  dyoung 	 */
   1197  1.250.6.2  dyoung 	q = TAILQ_FIRST(&fp->ipq_fragq);
   1198  1.250.6.2  dyoung 	ip = q->ipqe_ip;
   1199  1.250.6.2  dyoung 	if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
   1200  1.250.6.2  dyoung 		ipstat.ips_toolong++;
   1201  1.250.6.2  dyoung 		ip_freef(fp);
   1202  1.250.6.2  dyoung 		return (0);
   1203  1.250.6.2  dyoung 	}
   1204  1.250.6.2  dyoung 	m = q->ipqe_m;
   1205  1.250.6.2  dyoung 	t = m->m_next;
   1206  1.250.6.2  dyoung 	m->m_next = 0;
   1207  1.250.6.2  dyoung 	m_cat(m, t);
   1208  1.250.6.2  dyoung 	nq = TAILQ_NEXT(q, ipqe_q);
   1209  1.250.6.2  dyoung 	s = splvm();
   1210  1.250.6.2  dyoung 	pool_put(&ipqent_pool, q);
   1211  1.250.6.2  dyoung 	splx(s);
   1212  1.250.6.2  dyoung 	for (q = nq; q != NULL; q = nq) {
   1213  1.250.6.2  dyoung 		t = q->ipqe_m;
   1214  1.250.6.2  dyoung 		nq = TAILQ_NEXT(q, ipqe_q);
   1215  1.250.6.2  dyoung 		s = splvm();
   1216  1.250.6.2  dyoung 		pool_put(&ipqent_pool, q);
   1217  1.250.6.2  dyoung 		splx(s);
   1218  1.250.6.2  dyoung 		m_cat(m, t);
   1219  1.250.6.2  dyoung 	}
   1220  1.250.6.2  dyoung 	ip_nfrags -= fp->ipq_nfrags;
   1221  1.250.6.2  dyoung 
   1222  1.250.6.2  dyoung 	/*
   1223  1.250.6.2  dyoung 	 * Create header for new ip packet by
   1224  1.250.6.2  dyoung 	 * modifying header of first packet;
   1225  1.250.6.2  dyoung 	 * dequeue and discard fragment reassembly header.
   1226  1.250.6.2  dyoung 	 * Make header visible.
   1227  1.250.6.2  dyoung 	 */
   1228  1.250.6.2  dyoung 	ip->ip_len = htons(next);
   1229  1.250.6.2  dyoung 	ip->ip_src = fp->ipq_src;
   1230  1.250.6.2  dyoung 	ip->ip_dst = fp->ipq_dst;
   1231  1.250.6.2  dyoung 	LIST_REMOVE(fp, ipq_q);
   1232  1.250.6.2  dyoung 	FREE(fp, M_FTABLE);
   1233  1.250.6.2  dyoung 	ip_nfragpackets--;
   1234  1.250.6.2  dyoung 	m->m_len += (ip->ip_hl << 2);
   1235  1.250.6.2  dyoung 	m->m_data -= (ip->ip_hl << 2);
   1236  1.250.6.2  dyoung 	/* some debugging cruft by sklower, below, will go away soon */
   1237  1.250.6.2  dyoung 	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
   1238  1.250.6.2  dyoung 		int plen = 0;
   1239  1.250.6.2  dyoung 		for (t = m; t; t = t->m_next)
   1240  1.250.6.2  dyoung 			plen += t->m_len;
   1241  1.250.6.2  dyoung 		m->m_pkthdr.len = plen;
   1242  1.250.6.2  dyoung 		m->m_pkthdr.csum_flags = 0;
   1243  1.250.6.2  dyoung 	}
   1244  1.250.6.2  dyoung 	return (m);
   1245  1.250.6.2  dyoung 
   1246  1.250.6.2  dyoung dropfrag:
   1247  1.250.6.2  dyoung 	if (fp != 0)
   1248  1.250.6.2  dyoung 		fp->ipq_nfrags--;
   1249  1.250.6.2  dyoung 	ip_nfrags--;
   1250  1.250.6.2  dyoung 	ipstat.ips_fragdropped++;
   1251  1.250.6.2  dyoung 	m_freem(m);
   1252  1.250.6.2  dyoung 	s = splvm();
   1253  1.250.6.2  dyoung 	pool_put(&ipqent_pool, ipqe);
   1254  1.250.6.2  dyoung 	splx(s);
   1255  1.250.6.2  dyoung 	return (0);
   1256  1.250.6.2  dyoung }
   1257  1.250.6.2  dyoung 
   1258  1.250.6.2  dyoung /*
   1259  1.250.6.2  dyoung  * Free a fragment reassembly header and all
   1260  1.250.6.2  dyoung  * associated datagrams.
   1261  1.250.6.2  dyoung  */
   1262  1.250.6.2  dyoung void
   1263  1.250.6.2  dyoung ip_freef(struct ipq *fp)
   1264  1.250.6.2  dyoung {
   1265  1.250.6.2  dyoung 	struct ipqent *q, *p;
   1266  1.250.6.2  dyoung 	u_int nfrags = 0;
   1267  1.250.6.2  dyoung 	int s;
   1268  1.250.6.2  dyoung 
   1269  1.250.6.2  dyoung 	IPQ_LOCK_CHECK();
   1270  1.250.6.2  dyoung 
   1271  1.250.6.2  dyoung 	for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
   1272  1.250.6.2  dyoung 		p = TAILQ_NEXT(q, ipqe_q);
   1273  1.250.6.2  dyoung 		m_freem(q->ipqe_m);
   1274  1.250.6.2  dyoung 		nfrags++;
   1275  1.250.6.2  dyoung 		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
   1276  1.250.6.2  dyoung 		s = splvm();
   1277  1.250.6.2  dyoung 		pool_put(&ipqent_pool, q);
   1278  1.250.6.2  dyoung 		splx(s);
   1279  1.250.6.2  dyoung 	}
   1280  1.250.6.2  dyoung 
   1281  1.250.6.2  dyoung 	if (nfrags != fp->ipq_nfrags)
   1282  1.250.6.2  dyoung 	    printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags);
   1283  1.250.6.2  dyoung 	ip_nfrags -= nfrags;
   1284  1.250.6.2  dyoung 	LIST_REMOVE(fp, ipq_q);
   1285  1.250.6.2  dyoung 	FREE(fp, M_FTABLE);
   1286  1.250.6.2  dyoung 	ip_nfragpackets--;
   1287  1.250.6.2  dyoung }
   1288  1.250.6.2  dyoung 
   1289  1.250.6.2  dyoung /*
   1290  1.250.6.2  dyoung  * IP reassembly TTL machinery for  multiplicative drop.
   1291  1.250.6.2  dyoung  */
   1292  1.250.6.2  dyoung static u_int	fragttl_histo[(IPFRAGTTL+1)];
   1293  1.250.6.2  dyoung 
   1294  1.250.6.2  dyoung 
   1295  1.250.6.2  dyoung /*
   1296  1.250.6.2  dyoung  * Decrement TTL of all reasembly queue entries by `ticks'.
   1297  1.250.6.2  dyoung  * Count number of distinct fragments (as opposed to partial, fragmented
   1298  1.250.6.2  dyoung  * datagrams) in the reassembly queue.  While we  traverse the entire
   1299  1.250.6.2  dyoung  * reassembly queue, compute and return the median TTL over all fragments.
   1300  1.250.6.2  dyoung  */
   1301  1.250.6.2  dyoung static u_int
   1302  1.250.6.2  dyoung ip_reass_ttl_decr(u_int ticks)
   1303  1.250.6.2  dyoung {
   1304  1.250.6.2  dyoung 	u_int nfrags, median, dropfraction, keepfraction;
   1305  1.250.6.2  dyoung 	struct ipq *fp, *nfp;
   1306  1.250.6.2  dyoung 	int i;
   1307  1.250.6.2  dyoung 
   1308  1.250.6.2  dyoung 	nfrags = 0;
   1309  1.250.6.2  dyoung 	memset(fragttl_histo, 0, sizeof fragttl_histo);
   1310  1.250.6.2  dyoung 
   1311  1.250.6.2  dyoung 	for (i = 0; i < IPREASS_NHASH; i++) {
   1312  1.250.6.2  dyoung 		for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) {
   1313  1.250.6.2  dyoung 			fp->ipq_ttl = ((fp->ipq_ttl  <= ticks) ?
   1314  1.250.6.2  dyoung 				       0 : fp->ipq_ttl - ticks);
   1315  1.250.6.2  dyoung 			nfp = LIST_NEXT(fp, ipq_q);
   1316  1.250.6.2  dyoung 			if (fp->ipq_ttl == 0) {
   1317  1.250.6.2  dyoung 				ipstat.ips_fragtimeout++;
   1318  1.250.6.2  dyoung 				ip_freef(fp);
   1319  1.250.6.2  dyoung 			} else {
   1320  1.250.6.2  dyoung 				nfrags += fp->ipq_nfrags;
   1321  1.250.6.2  dyoung 				fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags;
   1322  1.250.6.2  dyoung 			}
   1323  1.250.6.2  dyoung 		}
   1324  1.250.6.2  dyoung 	}
   1325  1.250.6.2  dyoung 
   1326  1.250.6.2  dyoung 	KASSERT(ip_nfrags == nfrags);
   1327  1.250.6.2  dyoung 
   1328  1.250.6.2  dyoung 	/* Find median (or other drop fraction) in histogram. */
   1329  1.250.6.2  dyoung 	dropfraction = (ip_nfrags / 2);
   1330  1.250.6.2  dyoung 	keepfraction = ip_nfrags - dropfraction;
   1331  1.250.6.2  dyoung 	for (i = IPFRAGTTL, median = 0; i >= 0; i--) {
   1332  1.250.6.2  dyoung 		median +=  fragttl_histo[i];
   1333  1.250.6.2  dyoung 		if (median >= keepfraction)
   1334  1.250.6.2  dyoung 			break;
   1335  1.250.6.2  dyoung 	}
   1336  1.250.6.2  dyoung 
   1337  1.250.6.2  dyoung 	/* Return TTL of median (or other fraction). */
   1338  1.250.6.2  dyoung 	return (u_int)i;
   1339  1.250.6.2  dyoung }
   1340  1.250.6.2  dyoung 
   1341  1.250.6.2  dyoung void
   1342  1.250.6.2  dyoung ip_reass_drophalf(void)
   1343  1.250.6.2  dyoung {
   1344  1.250.6.2  dyoung 
   1345  1.250.6.2  dyoung 	u_int median_ticks;
   1346  1.250.6.2  dyoung 	/*
   1347  1.250.6.2  dyoung 	 * Compute median TTL of all fragments, and count frags
   1348  1.250.6.2  dyoung 	 * with that TTL or lower (roughly half of all fragments).
   1349  1.250.6.2  dyoung 	 */
   1350  1.250.6.2  dyoung 	median_ticks = ip_reass_ttl_decr(0);
   1351  1.250.6.2  dyoung 
   1352  1.250.6.2  dyoung 	/* Drop half. */
   1353  1.250.6.2  dyoung 	median_ticks = ip_reass_ttl_decr(median_ticks);
   1354  1.250.6.2  dyoung 
   1355  1.250.6.2  dyoung }
   1356  1.250.6.2  dyoung 
   1357  1.250.6.2  dyoung /*
   1358  1.250.6.2  dyoung  * IP timer processing;
   1359  1.250.6.2  dyoung  * if a timer expires on a reassembly
   1360  1.250.6.2  dyoung  * queue, discard it.
   1361  1.250.6.2  dyoung  */
   1362  1.250.6.2  dyoung void
   1363  1.250.6.2  dyoung ip_slowtimo(void)
   1364  1.250.6.2  dyoung {
   1365  1.250.6.2  dyoung 	static u_int dropscanidx = 0;
   1366  1.250.6.2  dyoung 	u_int i;
   1367  1.250.6.2  dyoung 	u_int median_ttl;
   1368  1.250.6.2  dyoung 	int s = splsoftnet();
   1369  1.250.6.2  dyoung 
   1370  1.250.6.2  dyoung 	IPQ_LOCK();
   1371  1.250.6.2  dyoung 
   1372  1.250.6.2  dyoung 	/* Age TTL of all fragments by 1 tick .*/
   1373  1.250.6.2  dyoung 	median_ttl = ip_reass_ttl_decr(1);
   1374  1.250.6.2  dyoung 
   1375  1.250.6.2  dyoung 	/* make sure fragment limit is up-to-date */
   1376  1.250.6.2  dyoung 	CHECK_NMBCLUSTER_PARAMS();
   1377  1.250.6.2  dyoung 
   1378  1.250.6.2  dyoung 	/* If we have too many fragments, drop the older half. */
   1379  1.250.6.2  dyoung 	if (ip_nfrags > ip_maxfrags)
   1380  1.250.6.2  dyoung 		ip_reass_ttl_decr(median_ttl);
   1381  1.250.6.2  dyoung 
   1382  1.250.6.2  dyoung 	/*
   1383  1.250.6.2  dyoung 	 * If we are over the maximum number of fragmented packets
   1384  1.250.6.2  dyoung 	 * (due to the limit being lowered), drain off
   1385  1.250.6.2  dyoung 	 * enough to get down to the new limit. Start draining
   1386  1.250.6.2  dyoung 	 * from the reassembly hashqueue most recently drained.
   1387  1.250.6.2  dyoung 	 */
   1388  1.250.6.2  dyoung 	if (ip_maxfragpackets < 0)
   1389  1.250.6.2  dyoung 		;
   1390  1.250.6.2  dyoung 	else {
   1391  1.250.6.2  dyoung 		int wrapped = 0;
   1392  1.250.6.2  dyoung 
   1393  1.250.6.2  dyoung 		i = dropscanidx;
   1394  1.250.6.2  dyoung 		while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) {
   1395  1.250.6.2  dyoung 			while (LIST_FIRST(&ipq[i]) != NULL)
   1396  1.250.6.2  dyoung 				ip_freef(LIST_FIRST(&ipq[i]));
   1397  1.250.6.2  dyoung 			if (++i >= IPREASS_NHASH) {
   1398  1.250.6.2  dyoung 				i = 0;
   1399  1.250.6.2  dyoung 			}
   1400  1.250.6.2  dyoung 			/*
   1401  1.250.6.2  dyoung 			 * Dont scan forever even if fragment counters are
   1402  1.250.6.2  dyoung 			 * wrong: stop after scanning entire reassembly queue.
   1403  1.250.6.2  dyoung 			 */
   1404  1.250.6.2  dyoung 			if (i == dropscanidx)
   1405  1.250.6.2  dyoung 			    wrapped = 1;
   1406  1.250.6.2  dyoung 		}
   1407  1.250.6.2  dyoung 		dropscanidx = i;
   1408  1.250.6.2  dyoung 	}
   1409  1.250.6.2  dyoung 	IPQ_UNLOCK();
   1410  1.250.6.2  dyoung 	splx(s);
   1411  1.250.6.2  dyoung }
   1412  1.250.6.2  dyoung 
   1413  1.250.6.2  dyoung /*
   1414  1.250.6.2  dyoung  * Drain off all datagram fragments.
   1415  1.250.6.2  dyoung  */
   1416  1.250.6.2  dyoung void
   1417  1.250.6.2  dyoung ip_drain(void)
   1418  1.250.6.2  dyoung {
   1419  1.250.6.2  dyoung 
   1420  1.250.6.2  dyoung 	/*
   1421  1.250.6.2  dyoung 	 * We may be called from a device's interrupt context.  If
   1422  1.250.6.2  dyoung 	 * the ipq is already busy, just bail out now.
   1423  1.250.6.2  dyoung 	 */
   1424  1.250.6.2  dyoung 	if (ipq_lock_try() == 0)
   1425  1.250.6.2  dyoung 		return;
   1426  1.250.6.2  dyoung 
   1427  1.250.6.2  dyoung 	/*
   1428  1.250.6.2  dyoung 	 * Drop half the total fragments now. If more mbufs are needed,
   1429  1.250.6.2  dyoung 	 *  we will be called again soon.
   1430  1.250.6.2  dyoung 	 */
   1431  1.250.6.2  dyoung 	ip_reass_drophalf();
   1432  1.250.6.2  dyoung 
   1433  1.250.6.2  dyoung 	IPQ_UNLOCK();
   1434  1.250.6.2  dyoung }
   1435  1.250.6.2  dyoung 
   1436  1.250.6.2  dyoung /*
   1437  1.250.6.2  dyoung  * Do option processing on a datagram,
   1438  1.250.6.2  dyoung  * possibly discarding it if bad options are encountered,
   1439  1.250.6.2  dyoung  * or forwarding it if source-routed.
   1440  1.250.6.2  dyoung  * Returns 1 if packet has been forwarded/freed,
   1441  1.250.6.2  dyoung  * 0 if the packet should be processed further.
   1442  1.250.6.2  dyoung  */
   1443  1.250.6.2  dyoung int
   1444  1.250.6.2  dyoung ip_dooptions(struct mbuf *m)
   1445  1.250.6.2  dyoung {
   1446  1.250.6.2  dyoung 	struct ip *ip = mtod(m, struct ip *);
   1447  1.250.6.2  dyoung 	u_char *cp, *cp0;
   1448  1.250.6.2  dyoung 	struct ip_timestamp *ipt;
   1449  1.250.6.2  dyoung 	struct in_ifaddr *ia;
   1450  1.250.6.2  dyoung 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
   1451  1.250.6.2  dyoung 	struct in_addr dst;
   1452  1.250.6.2  dyoung 	n_time ntime;
   1453  1.250.6.2  dyoung 
   1454  1.250.6.2  dyoung 	dst = ip->ip_dst;
   1455  1.250.6.2  dyoung 	cp = (u_char *)(ip + 1);
   1456  1.250.6.2  dyoung 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
   1457  1.250.6.2  dyoung 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
   1458  1.250.6.2  dyoung 		opt = cp[IPOPT_OPTVAL];
   1459  1.250.6.2  dyoung 		if (opt == IPOPT_EOL)
   1460  1.250.6.2  dyoung 			break;
   1461  1.250.6.2  dyoung 		if (opt == IPOPT_NOP)
   1462  1.250.6.2  dyoung 			optlen = 1;
   1463  1.250.6.2  dyoung 		else {
   1464  1.250.6.2  dyoung 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
   1465  1.250.6.2  dyoung 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1466  1.250.6.2  dyoung 				goto bad;
   1467  1.250.6.2  dyoung 			}
   1468  1.250.6.2  dyoung 			optlen = cp[IPOPT_OLEN];
   1469  1.250.6.2  dyoung 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
   1470  1.250.6.2  dyoung 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1471  1.250.6.2  dyoung 				goto bad;
   1472  1.250.6.2  dyoung 			}
   1473  1.250.6.2  dyoung 		}
   1474  1.250.6.2  dyoung 		switch (opt) {
   1475  1.250.6.2  dyoung 
   1476  1.250.6.2  dyoung 		default:
   1477  1.250.6.2  dyoung 			break;
   1478  1.250.6.2  dyoung 
   1479  1.250.6.2  dyoung 		/*
   1480  1.250.6.2  dyoung 		 * Source routing with record.
   1481  1.250.6.2  dyoung 		 * Find interface with current destination address.
   1482  1.250.6.2  dyoung 		 * If none on this machine then drop if strictly routed,
   1483  1.250.6.2  dyoung 		 * or do nothing if loosely routed.
   1484  1.250.6.2  dyoung 		 * Record interface address and bring up next address
   1485  1.250.6.2  dyoung 		 * component.  If strictly routed make sure next
   1486  1.250.6.2  dyoung 		 * address is on directly accessible net.
   1487  1.250.6.2  dyoung 		 */
   1488  1.250.6.2  dyoung 		case IPOPT_LSRR:
   1489  1.250.6.2  dyoung 		case IPOPT_SSRR:
   1490  1.250.6.2  dyoung 			if (ip_allowsrcrt == 0) {
   1491  1.250.6.2  dyoung 				type = ICMP_UNREACH;
   1492  1.250.6.2  dyoung 				code = ICMP_UNREACH_NET_PROHIB;
   1493  1.250.6.2  dyoung 				goto bad;
   1494  1.250.6.2  dyoung 			}
   1495  1.250.6.2  dyoung 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
   1496  1.250.6.2  dyoung 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1497  1.250.6.2  dyoung 				goto bad;
   1498  1.250.6.2  dyoung 			}
   1499  1.250.6.2  dyoung 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
   1500  1.250.6.2  dyoung 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
   1501  1.250.6.2  dyoung 				goto bad;
   1502  1.250.6.2  dyoung 			}
   1503  1.250.6.2  dyoung 			ipaddr.sin_addr = ip->ip_dst;
   1504  1.250.6.2  dyoung 			ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
   1505  1.250.6.2  dyoung 			if (ia == 0) {
   1506  1.250.6.2  dyoung 				if (opt == IPOPT_SSRR) {
   1507  1.250.6.2  dyoung 					type = ICMP_UNREACH;
   1508  1.250.6.2  dyoung 					code = ICMP_UNREACH_SRCFAIL;
   1509  1.250.6.2  dyoung 					goto bad;
   1510  1.250.6.2  dyoung 				}
   1511  1.250.6.2  dyoung 				/*
   1512  1.250.6.2  dyoung 				 * Loose routing, and not at next destination
   1513  1.250.6.2  dyoung 				 * yet; nothing to do except forward.
   1514  1.250.6.2  dyoung 				 */
   1515  1.250.6.2  dyoung 				break;
   1516  1.250.6.2  dyoung 			}
   1517  1.250.6.2  dyoung 			off--;			/* 0 origin */
   1518  1.250.6.2  dyoung 			if ((off + sizeof(struct in_addr)) > optlen) {
   1519  1.250.6.2  dyoung 				/*
   1520  1.250.6.2  dyoung 				 * End of source route.  Should be for us.
   1521  1.250.6.2  dyoung 				 */
   1522  1.250.6.2  dyoung 				save_rte(cp, ip->ip_src);
   1523  1.250.6.2  dyoung 				break;
   1524  1.250.6.2  dyoung 			}
   1525  1.250.6.2  dyoung 			/*
   1526  1.250.6.2  dyoung 			 * locate outgoing interface
   1527  1.250.6.2  dyoung 			 */
   1528  1.250.6.2  dyoung 			bcopy((void *)(cp + off), (void *)&ipaddr.sin_addr,
   1529  1.250.6.2  dyoung 			    sizeof(ipaddr.sin_addr));
   1530  1.250.6.2  dyoung 			if (opt == IPOPT_SSRR)
   1531  1.250.6.2  dyoung 				ia = ifatoia(ifa_ifwithladdr(sintosa(&ipaddr)));
   1532  1.250.6.2  dyoung 			else
   1533  1.250.6.2  dyoung 				ia = ip_rtaddr(ipaddr.sin_addr);
   1534  1.250.6.2  dyoung 			if (ia == 0) {
   1535  1.250.6.2  dyoung 				type = ICMP_UNREACH;
   1536  1.250.6.2  dyoung 				code = ICMP_UNREACH_SRCFAIL;
   1537  1.250.6.2  dyoung 				goto bad;
   1538  1.250.6.2  dyoung 			}
   1539  1.250.6.2  dyoung 			ip->ip_dst = ipaddr.sin_addr;
   1540  1.250.6.2  dyoung 			bcopy((void *)&ia->ia_addr.sin_addr,
   1541  1.250.6.2  dyoung 			    (void *)(cp + off), sizeof(struct in_addr));
   1542  1.250.6.2  dyoung 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
   1543  1.250.6.2  dyoung 			/*
   1544  1.250.6.2  dyoung 			 * Let ip_intr's mcast routing check handle mcast pkts
   1545  1.250.6.2  dyoung 			 */
   1546  1.250.6.2  dyoung 			forward = !IN_MULTICAST(ip->ip_dst.s_addr);
   1547  1.250.6.2  dyoung 			break;
   1548  1.250.6.2  dyoung 
   1549  1.250.6.2  dyoung 		case IPOPT_RR:
   1550  1.250.6.2  dyoung 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
   1551  1.250.6.2  dyoung 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1552  1.250.6.2  dyoung 				goto bad;
   1553  1.250.6.2  dyoung 			}
   1554  1.250.6.2  dyoung 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
   1555  1.250.6.2  dyoung 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
   1556  1.250.6.2  dyoung 				goto bad;
   1557  1.250.6.2  dyoung 			}
   1558  1.250.6.2  dyoung 			/*
   1559  1.250.6.2  dyoung 			 * If no space remains, ignore.
   1560  1.250.6.2  dyoung 			 */
   1561  1.250.6.2  dyoung 			off--;			/* 0 origin */
   1562  1.250.6.2  dyoung 			if ((off + sizeof(struct in_addr)) > optlen)
   1563  1.250.6.2  dyoung 				break;
   1564  1.250.6.2  dyoung 			bcopy((void *)(&ip->ip_dst), (void *)&ipaddr.sin_addr,
   1565  1.250.6.2  dyoung 			    sizeof(ipaddr.sin_addr));
   1566  1.250.6.2  dyoung 			/*
   1567  1.250.6.2  dyoung 			 * locate outgoing interface; if we're the destination,
   1568  1.250.6.2  dyoung 			 * use the incoming interface (should be same).
   1569  1.250.6.2  dyoung 			 */
   1570  1.250.6.2  dyoung 			if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
   1571  1.250.6.2  dyoung 			    == NULL &&
   1572  1.250.6.2  dyoung 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
   1573  1.250.6.2  dyoung 				type = ICMP_UNREACH;
   1574  1.250.6.2  dyoung 				code = ICMP_UNREACH_HOST;
   1575  1.250.6.2  dyoung 				goto bad;
   1576  1.250.6.2  dyoung 			}
   1577  1.250.6.2  dyoung 			bcopy((void *)&ia->ia_addr.sin_addr,
   1578  1.250.6.2  dyoung 			    (void *)(cp + off), sizeof(struct in_addr));
   1579  1.250.6.2  dyoung 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
   1580  1.250.6.2  dyoung 			break;
   1581  1.250.6.2  dyoung 
   1582  1.250.6.2  dyoung 		case IPOPT_TS:
   1583  1.250.6.2  dyoung 			code = cp - (u_char *)ip;
   1584  1.250.6.2  dyoung 			ipt = (struct ip_timestamp *)cp;
   1585  1.250.6.2  dyoung 			if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
   1586  1.250.6.2  dyoung 				code = (u_char *)&ipt->ipt_len - (u_char *)ip;
   1587  1.250.6.2  dyoung 				goto bad;
   1588  1.250.6.2  dyoung 			}
   1589  1.250.6.2  dyoung 			if (ipt->ipt_ptr < 5) {
   1590  1.250.6.2  dyoung 				code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
   1591  1.250.6.2  dyoung 				goto bad;
   1592  1.250.6.2  dyoung 			}
   1593  1.250.6.2  dyoung 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
   1594  1.250.6.2  dyoung 				if (++ipt->ipt_oflw == 0) {
   1595  1.250.6.2  dyoung 					code = (u_char *)&ipt->ipt_ptr -
   1596  1.250.6.2  dyoung 					    (u_char *)ip;
   1597  1.250.6.2  dyoung 					goto bad;
   1598  1.250.6.2  dyoung 				}
   1599  1.250.6.2  dyoung 				break;
   1600  1.250.6.2  dyoung 			}
   1601  1.250.6.2  dyoung 			cp0 = (cp + ipt->ipt_ptr - 1);
   1602  1.250.6.2  dyoung 			switch (ipt->ipt_flg) {
   1603  1.250.6.2  dyoung 
   1604  1.250.6.2  dyoung 			case IPOPT_TS_TSONLY:
   1605  1.250.6.2  dyoung 				break;
   1606  1.250.6.2  dyoung 
   1607  1.250.6.2  dyoung 			case IPOPT_TS_TSANDADDR:
   1608  1.250.6.2  dyoung 				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
   1609  1.250.6.2  dyoung 				    sizeof(struct in_addr) > ipt->ipt_len) {
   1610  1.250.6.2  dyoung 					code = (u_char *)&ipt->ipt_ptr -
   1611  1.250.6.2  dyoung 					    (u_char *)ip;
   1612  1.250.6.2  dyoung 					goto bad;
   1613  1.250.6.2  dyoung 				}
   1614  1.250.6.2  dyoung 				ipaddr.sin_addr = dst;
   1615  1.250.6.2  dyoung 				ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
   1616  1.250.6.2  dyoung 				    m->m_pkthdr.rcvif));
   1617  1.250.6.2  dyoung 				if (ia == 0)
   1618  1.250.6.2  dyoung 					continue;
   1619  1.250.6.2  dyoung 				bcopy(&ia->ia_addr.sin_addr,
   1620  1.250.6.2  dyoung 				    cp0, sizeof(struct in_addr));
   1621  1.250.6.2  dyoung 				ipt->ipt_ptr += sizeof(struct in_addr);
   1622  1.250.6.2  dyoung 				break;
   1623  1.250.6.2  dyoung 
   1624  1.250.6.2  dyoung 			case IPOPT_TS_PRESPEC:
   1625  1.250.6.2  dyoung 				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
   1626  1.250.6.2  dyoung 				    sizeof(struct in_addr) > ipt->ipt_len) {
   1627  1.250.6.2  dyoung 					code = (u_char *)&ipt->ipt_ptr -
   1628  1.250.6.2  dyoung 					    (u_char *)ip;
   1629  1.250.6.2  dyoung 					goto bad;
   1630  1.250.6.2  dyoung 				}
   1631  1.250.6.2  dyoung 				bcopy(cp0, &ipaddr.sin_addr,
   1632  1.250.6.2  dyoung 				    sizeof(struct in_addr));
   1633  1.250.6.2  dyoung 				if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
   1634  1.250.6.2  dyoung 				    == NULL)
   1635  1.250.6.2  dyoung 					continue;
   1636  1.250.6.2  dyoung 				ipt->ipt_ptr += sizeof(struct in_addr);
   1637  1.250.6.2  dyoung 				break;
   1638  1.250.6.2  dyoung 
   1639  1.250.6.2  dyoung 			default:
   1640  1.250.6.2  dyoung 				/* XXX can't take &ipt->ipt_flg */
   1641  1.250.6.2  dyoung 				code = (u_char *)&ipt->ipt_ptr -
   1642  1.250.6.2  dyoung 				    (u_char *)ip + 1;
   1643  1.250.6.2  dyoung 				goto bad;
   1644  1.250.6.2  dyoung 			}
   1645  1.250.6.2  dyoung 			ntime = iptime();
   1646  1.250.6.2  dyoung 			cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
   1647  1.250.6.2  dyoung 			memmove((char *)cp + ipt->ipt_ptr - 1, cp0,
   1648  1.250.6.2  dyoung 			    sizeof(n_time));
   1649  1.250.6.2  dyoung 			ipt->ipt_ptr += sizeof(n_time);
   1650  1.250.6.2  dyoung 		}
   1651  1.250.6.2  dyoung 	}
   1652  1.250.6.2  dyoung 	if (forward) {
   1653  1.250.6.2  dyoung 		if (ip_forwsrcrt == 0) {
   1654  1.250.6.2  dyoung 			type = ICMP_UNREACH;
   1655  1.250.6.2  dyoung 			code = ICMP_UNREACH_SRCFAIL;
   1656  1.250.6.2  dyoung 			goto bad;
   1657  1.250.6.2  dyoung 		}
   1658  1.250.6.2  dyoung 		ip_forward(m, 1);
   1659  1.250.6.2  dyoung 		return (1);
   1660  1.250.6.2  dyoung 	}
   1661  1.250.6.2  dyoung 	return (0);
   1662  1.250.6.2  dyoung bad:
   1663  1.250.6.2  dyoung 	icmp_error(m, type, code, 0, 0);
   1664  1.250.6.2  dyoung 	ipstat.ips_badoptions++;
   1665  1.250.6.2  dyoung 	return (1);
   1666  1.250.6.2  dyoung }
   1667  1.250.6.2  dyoung 
   1668  1.250.6.2  dyoung /*
   1669  1.250.6.2  dyoung  * Given address of next destination (final or next hop),
   1670  1.250.6.2  dyoung  * return internet address info of interface to be used to get there.
   1671  1.250.6.2  dyoung  */
   1672  1.250.6.2  dyoung struct in_ifaddr *
   1673  1.250.6.2  dyoung ip_rtaddr(struct in_addr dst)
   1674  1.250.6.2  dyoung {
   1675  1.250.6.2  dyoung 	struct rtentry *rt;
   1676  1.250.6.2  dyoung 	union {
   1677  1.250.6.2  dyoung 		struct sockaddr		dst;
   1678  1.250.6.2  dyoung 		struct sockaddr_in	dst4;
   1679  1.250.6.2  dyoung 	} u;
   1680  1.250.6.2  dyoung 
   1681  1.250.6.2  dyoung 	sockaddr_in_init(&u.dst4, &dst, 0);
   1682  1.250.6.2  dyoung 
   1683  1.250.6.2  dyoung 	if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL)
   1684  1.250.6.2  dyoung 		return NULL;
   1685  1.250.6.2  dyoung 
   1686  1.250.6.2  dyoung 	return ifatoia(rt->rt_ifa);
   1687  1.250.6.2  dyoung }
   1688  1.250.6.2  dyoung 
   1689  1.250.6.2  dyoung /*
   1690  1.250.6.2  dyoung  * Save incoming source route for use in replies,
   1691  1.250.6.2  dyoung  * to be picked up later by ip_srcroute if the receiver is interested.
   1692  1.250.6.2  dyoung  */
   1693  1.250.6.2  dyoung void
   1694  1.250.6.2  dyoung save_rte(u_char *option, struct in_addr dst)
   1695  1.250.6.2  dyoung {
   1696  1.250.6.2  dyoung 	unsigned olen;
   1697  1.250.6.2  dyoung 
   1698  1.250.6.2  dyoung 	olen = option[IPOPT_OLEN];
   1699  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1700  1.250.6.2  dyoung 	if (ipprintfs)
   1701  1.250.6.2  dyoung 		printf("save_rte: olen %d\n", olen);
   1702  1.250.6.2  dyoung #endif /* 0 */
   1703  1.250.6.2  dyoung 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
   1704  1.250.6.2  dyoung 		return;
   1705  1.250.6.2  dyoung 	bcopy((void *)option, (void *)ip_srcrt.srcopt, olen);
   1706  1.250.6.2  dyoung 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
   1707  1.250.6.2  dyoung 	ip_srcrt.dst = dst;
   1708  1.250.6.2  dyoung }
   1709  1.250.6.2  dyoung 
   1710  1.250.6.2  dyoung /*
   1711  1.250.6.2  dyoung  * Retrieve incoming source route for use in replies,
   1712  1.250.6.2  dyoung  * in the same form used by setsockopt.
   1713  1.250.6.2  dyoung  * The first hop is placed before the options, will be removed later.
   1714  1.250.6.2  dyoung  */
   1715  1.250.6.2  dyoung struct mbuf *
   1716  1.250.6.2  dyoung ip_srcroute(void)
   1717  1.250.6.2  dyoung {
   1718  1.250.6.2  dyoung 	struct in_addr *p, *q;
   1719  1.250.6.2  dyoung 	struct mbuf *m;
   1720  1.250.6.2  dyoung 
   1721  1.250.6.2  dyoung 	if (ip_nhops == 0)
   1722  1.250.6.2  dyoung 		return NULL;
   1723  1.250.6.2  dyoung 	m = m_get(M_DONTWAIT, MT_SOOPTS);
   1724  1.250.6.2  dyoung 	if (m == 0)
   1725  1.250.6.2  dyoung 		return NULL;
   1726  1.250.6.2  dyoung 
   1727  1.250.6.2  dyoung 	MCLAIM(m, &inetdomain.dom_mowner);
   1728  1.250.6.2  dyoung #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
   1729  1.250.6.2  dyoung 
   1730  1.250.6.2  dyoung 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
   1731  1.250.6.2  dyoung 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
   1732  1.250.6.2  dyoung 	    OPTSIZ;
   1733  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1734  1.250.6.2  dyoung 	if (ipprintfs)
   1735  1.250.6.2  dyoung 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
   1736  1.250.6.2  dyoung #endif
   1737  1.250.6.2  dyoung 
   1738  1.250.6.2  dyoung 	/*
   1739  1.250.6.2  dyoung 	 * First save first hop for return route
   1740  1.250.6.2  dyoung 	 */
   1741  1.250.6.2  dyoung 	p = &ip_srcrt.route[ip_nhops - 1];
   1742  1.250.6.2  dyoung 	*(mtod(m, struct in_addr *)) = *p--;
   1743  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1744  1.250.6.2  dyoung 	if (ipprintfs)
   1745  1.250.6.2  dyoung 		printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
   1746  1.250.6.2  dyoung #endif
   1747  1.250.6.2  dyoung 
   1748  1.250.6.2  dyoung 	/*
   1749  1.250.6.2  dyoung 	 * Copy option fields and padding (nop) to mbuf.
   1750  1.250.6.2  dyoung 	 */
   1751  1.250.6.2  dyoung 	ip_srcrt.nop = IPOPT_NOP;
   1752  1.250.6.2  dyoung 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
   1753  1.250.6.2  dyoung 	memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop,
   1754  1.250.6.2  dyoung 	    OPTSIZ);
   1755  1.250.6.2  dyoung 	q = (struct in_addr *)(mtod(m, char *) +
   1756  1.250.6.2  dyoung 	    sizeof(struct in_addr) + OPTSIZ);
   1757  1.250.6.2  dyoung #undef OPTSIZ
   1758  1.250.6.2  dyoung 	/*
   1759  1.250.6.2  dyoung 	 * Record return path as an IP source route,
   1760  1.250.6.2  dyoung 	 * reversing the path (pointers are now aligned).
   1761  1.250.6.2  dyoung 	 */
   1762  1.250.6.2  dyoung 	while (p >= ip_srcrt.route) {
   1763  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1764  1.250.6.2  dyoung 		if (ipprintfs)
   1765  1.250.6.2  dyoung 			printf(" %x", ntohl(q->s_addr));
   1766  1.250.6.2  dyoung #endif
   1767  1.250.6.2  dyoung 		*q++ = *p--;
   1768  1.250.6.2  dyoung 	}
   1769  1.250.6.2  dyoung 	/*
   1770  1.250.6.2  dyoung 	 * Last hop goes to final destination.
   1771  1.250.6.2  dyoung 	 */
   1772  1.250.6.2  dyoung 	*q = ip_srcrt.dst;
   1773  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1774  1.250.6.2  dyoung 	if (ipprintfs)
   1775  1.250.6.2  dyoung 		printf(" %x\n", ntohl(q->s_addr));
   1776  1.250.6.2  dyoung #endif
   1777  1.250.6.2  dyoung 	return (m);
   1778  1.250.6.2  dyoung }
   1779  1.250.6.2  dyoung 
   1780  1.250.6.2  dyoung /*
   1781  1.250.6.2  dyoung  * Strip out IP options, at higher
   1782  1.250.6.2  dyoung  * level protocol in the kernel.
   1783  1.250.6.2  dyoung  * Second argument is buffer to which options
   1784  1.250.6.2  dyoung  * will be moved, and return value is their length.
   1785  1.250.6.2  dyoung  * XXX should be deleted; last arg currently ignored.
   1786  1.250.6.2  dyoung  */
   1787  1.250.6.2  dyoung void
   1788  1.250.6.2  dyoung ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
   1789  1.250.6.2  dyoung {
   1790  1.250.6.2  dyoung 	int i;
   1791  1.250.6.2  dyoung 	struct ip *ip = mtod(m, struct ip *);
   1792  1.250.6.2  dyoung 	void *opts;
   1793  1.250.6.2  dyoung 	int olen;
   1794  1.250.6.2  dyoung 
   1795  1.250.6.2  dyoung 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
   1796  1.250.6.2  dyoung 	opts = (void *)(ip + 1);
   1797  1.250.6.2  dyoung 	i = m->m_len - (sizeof (struct ip) + olen);
   1798  1.250.6.2  dyoung 	memmove(opts, (char *)opts + olen, (unsigned)i);
   1799  1.250.6.2  dyoung 	m->m_len -= olen;
   1800  1.250.6.2  dyoung 	if (m->m_flags & M_PKTHDR)
   1801  1.250.6.2  dyoung 		m->m_pkthdr.len -= olen;
   1802  1.250.6.2  dyoung 	ip->ip_len = htons(ntohs(ip->ip_len) - olen);
   1803  1.250.6.2  dyoung 	ip->ip_hl = sizeof (struct ip) >> 2;
   1804  1.250.6.2  dyoung }
   1805  1.250.6.2  dyoung 
   1806  1.250.6.2  dyoung const int inetctlerrmap[PRC_NCMDS] = {
   1807  1.250.6.2  dyoung 	0,		0,		0,		0,
   1808  1.250.6.2  dyoung 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
   1809  1.250.6.2  dyoung 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
   1810  1.250.6.2  dyoung 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
   1811  1.250.6.2  dyoung 	0,		0,		0,		0,
   1812  1.250.6.2  dyoung 	ENOPROTOOPT
   1813  1.250.6.2  dyoung };
   1814  1.250.6.2  dyoung 
   1815  1.250.6.2  dyoung /*
   1816  1.250.6.2  dyoung  * Forward a packet.  If some error occurs return the sender
   1817  1.250.6.2  dyoung  * an icmp packet.  Note we can't always generate a meaningful
   1818  1.250.6.2  dyoung  * icmp message because icmp doesn't have a large enough repertoire
   1819  1.250.6.2  dyoung  * of codes and types.
   1820  1.250.6.2  dyoung  *
   1821  1.250.6.2  dyoung  * If not forwarding, just drop the packet.  This could be confusing
   1822  1.250.6.2  dyoung  * if ipforwarding was zero but some routing protocol was advancing
   1823  1.250.6.2  dyoung  * us as a gateway to somewhere.  However, we must let the routing
   1824  1.250.6.2  dyoung  * protocol deal with that.
   1825  1.250.6.2  dyoung  *
   1826  1.250.6.2  dyoung  * The srcrt parameter indicates whether the packet is being forwarded
   1827  1.250.6.2  dyoung  * via a source route.
   1828  1.250.6.2  dyoung  */
   1829  1.250.6.2  dyoung void
   1830  1.250.6.2  dyoung ip_forward(struct mbuf *m, int srcrt)
   1831  1.250.6.2  dyoung {
   1832  1.250.6.2  dyoung 	struct ip *ip = mtod(m, struct ip *);
   1833  1.250.6.2  dyoung 	struct rtentry *rt;
   1834  1.250.6.2  dyoung 	int error, type = 0, code = 0, destmtu = 0;
   1835  1.250.6.2  dyoung 	struct mbuf *mcopy;
   1836  1.250.6.2  dyoung 	n_long dest;
   1837  1.250.6.2  dyoung 	union {
   1838  1.250.6.2  dyoung 		struct sockaddr		dst;
   1839  1.250.6.2  dyoung 		struct sockaddr_in	dst4;
   1840  1.250.6.2  dyoung 	} u;
   1841  1.250.6.2  dyoung 
   1842  1.250.6.2  dyoung 	/*
   1843  1.250.6.2  dyoung 	 * We are now in the output path.
   1844  1.250.6.2  dyoung 	 */
   1845  1.250.6.2  dyoung 	MCLAIM(m, &ip_tx_mowner);
   1846  1.250.6.2  dyoung 
   1847  1.250.6.2  dyoung 	/*
   1848  1.250.6.2  dyoung 	 * Clear any in-bound checksum flags for this packet.
   1849  1.250.6.2  dyoung 	 */
   1850  1.250.6.2  dyoung 	m->m_pkthdr.csum_flags = 0;
   1851  1.250.6.2  dyoung 
   1852  1.250.6.2  dyoung 	dest = 0;
   1853  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1854  1.250.6.2  dyoung 	if (ipprintfs) {
   1855  1.250.6.2  dyoung 		printf("forward: src %s ", inet_ntoa(ip->ip_src));
   1856  1.250.6.2  dyoung 		printf("dst %s ttl %x\n", inet_ntoa(ip->ip_dst), ip->ip_ttl);
   1857  1.250.6.2  dyoung 	}
   1858  1.250.6.2  dyoung #endif
   1859  1.250.6.2  dyoung 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
   1860  1.250.6.2  dyoung 		ipstat.ips_cantforward++;
   1861  1.250.6.2  dyoung 		m_freem(m);
   1862  1.250.6.2  dyoung 		return;
   1863  1.250.6.2  dyoung 	}
   1864  1.250.6.2  dyoung 	if (ip->ip_ttl <= IPTTLDEC) {
   1865  1.250.6.2  dyoung 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
   1866  1.250.6.2  dyoung 		return;
   1867  1.250.6.2  dyoung 	}
   1868  1.250.6.2  dyoung 
   1869  1.250.6.2  dyoung 	sockaddr_in_init(&u.dst4, &ip->ip_dst, 0);
   1870  1.250.6.2  dyoung 	if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL) {
   1871  1.250.6.2  dyoung 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0);
   1872  1.250.6.2  dyoung 		return;
   1873  1.250.6.2  dyoung 	}
   1874  1.250.6.2  dyoung 
   1875  1.250.6.2  dyoung 	/*
   1876  1.250.6.2  dyoung 	 * Save at most 68 bytes of the packet in case
   1877  1.250.6.2  dyoung 	 * we need to generate an ICMP message to the src.
   1878  1.250.6.2  dyoung 	 * Pullup to avoid sharing mbuf cluster between m and mcopy.
   1879  1.250.6.2  dyoung 	 */
   1880  1.250.6.2  dyoung 	mcopy = m_copym(m, 0, imin(ntohs(ip->ip_len), 68), M_DONTWAIT);
   1881  1.250.6.2  dyoung 	if (mcopy)
   1882  1.250.6.2  dyoung 		mcopy = m_pullup(mcopy, ip->ip_hl << 2);
   1883  1.250.6.2  dyoung 
   1884  1.250.6.2  dyoung 	ip->ip_ttl -= IPTTLDEC;
   1885  1.250.6.2  dyoung 
   1886  1.250.6.2  dyoung 	/*
   1887  1.250.6.2  dyoung 	 * If forwarding packet using same interface that it came in on,
   1888  1.250.6.2  dyoung 	 * perhaps should send a redirect to sender to shortcut a hop.
   1889  1.250.6.2  dyoung 	 * Only send redirect if source is sending directly to us,
   1890  1.250.6.2  dyoung 	 * and if packet was not source routed (or has any options).
   1891  1.250.6.2  dyoung 	 * Also, don't send redirect if forwarding using a default route
   1892  1.250.6.2  dyoung 	 * or a route modified by a redirect.
   1893  1.250.6.2  dyoung 	 */
   1894  1.250.6.2  dyoung 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
   1895  1.250.6.2  dyoung 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
   1896  1.250.6.2  dyoung 	    !in_nullhost(satocsin(rt_getkey(rt))->sin_addr) &&
   1897  1.250.6.2  dyoung 	    ipsendredirects && !srcrt) {
   1898  1.250.6.2  dyoung 		if (rt->rt_ifa &&
   1899  1.250.6.2  dyoung 		    (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
   1900  1.250.6.2  dyoung 		    ifatoia(rt->rt_ifa)->ia_subnet) {
   1901  1.250.6.2  dyoung 			if (rt->rt_flags & RTF_GATEWAY)
   1902  1.250.6.2  dyoung 				dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
   1903  1.250.6.2  dyoung 			else
   1904  1.250.6.2  dyoung 				dest = ip->ip_dst.s_addr;
   1905  1.250.6.2  dyoung 			/*
   1906  1.250.6.2  dyoung 			 * Router requirements says to only send host
   1907  1.250.6.2  dyoung 			 * redirects.
   1908  1.250.6.2  dyoung 			 */
   1909  1.250.6.2  dyoung 			type = ICMP_REDIRECT;
   1910  1.250.6.2  dyoung 			code = ICMP_REDIRECT_HOST;
   1911  1.250.6.2  dyoung #ifdef DIAGNOSTIC
   1912  1.250.6.2  dyoung 			if (ipprintfs)
   1913  1.250.6.2  dyoung 				printf("redirect (%d) to %x\n", code,
   1914  1.250.6.2  dyoung 				    (u_int32_t)dest);
   1915  1.250.6.2  dyoung #endif
   1916  1.250.6.2  dyoung 		}
   1917  1.250.6.2  dyoung 	}
   1918  1.250.6.2  dyoung 
   1919  1.250.6.2  dyoung 	error = ip_output(m, NULL, &ipforward_rt,
   1920  1.250.6.2  dyoung 	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
   1921  1.250.6.2  dyoung 	    (struct ip_moptions *)NULL, (struct socket *)NULL);
   1922  1.250.6.2  dyoung 
   1923  1.250.6.2  dyoung 	if (error)
   1924  1.250.6.2  dyoung 		ipstat.ips_cantforward++;
   1925  1.250.6.2  dyoung 	else {
   1926  1.250.6.2  dyoung 		ipstat.ips_forward++;
   1927  1.250.6.2  dyoung 		if (type)
   1928  1.250.6.2  dyoung 			ipstat.ips_redirectsent++;
   1929  1.250.6.2  dyoung 		else {
   1930  1.250.6.2  dyoung 			if (mcopy) {
   1931  1.250.6.2  dyoung #ifdef GATEWAY
   1932  1.250.6.2  dyoung 				if (mcopy->m_flags & M_CANFASTFWD)
   1933  1.250.6.2  dyoung 					ipflow_create(&ipforward_rt, mcopy);
   1934  1.250.6.2  dyoung #endif
   1935  1.250.6.2  dyoung 				m_freem(mcopy);
   1936  1.250.6.2  dyoung 			}
   1937  1.250.6.2  dyoung 			return;
   1938  1.250.6.2  dyoung 		}
   1939  1.250.6.2  dyoung 	}
   1940  1.250.6.2  dyoung 	if (mcopy == NULL)
   1941  1.250.6.2  dyoung 		return;
   1942  1.250.6.2  dyoung 
   1943  1.250.6.2  dyoung 	switch (error) {
   1944  1.250.6.2  dyoung 
   1945  1.250.6.2  dyoung 	case 0:				/* forwarded, but need redirect */
   1946  1.250.6.2  dyoung 		/* type, code set above */
   1947  1.250.6.2  dyoung 		break;
   1948  1.250.6.2  dyoung 
   1949  1.250.6.2  dyoung 	case ENETUNREACH:		/* shouldn't happen, checked above */
   1950  1.250.6.2  dyoung 	case EHOSTUNREACH:
   1951  1.250.6.2  dyoung 	case ENETDOWN:
   1952  1.250.6.2  dyoung 	case EHOSTDOWN:
   1953  1.250.6.2  dyoung 	default:
   1954  1.250.6.2  dyoung 		type = ICMP_UNREACH;
   1955  1.250.6.2  dyoung 		code = ICMP_UNREACH_HOST;
   1956  1.250.6.2  dyoung 		break;
   1957  1.250.6.2  dyoung 
   1958  1.250.6.2  dyoung 	case EMSGSIZE:
   1959  1.250.6.2  dyoung 		type = ICMP_UNREACH;
   1960  1.250.6.2  dyoung 		code = ICMP_UNREACH_NEEDFRAG;
   1961  1.250.6.2  dyoung #if !defined(IPSEC) && !defined(FAST_IPSEC)
   1962  1.250.6.2  dyoung 		if (ipforward_rt.ro_rt != NULL)
   1963  1.250.6.2  dyoung 			destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
   1964  1.250.6.2  dyoung #else
   1965  1.250.6.2  dyoung 		/*
   1966  1.250.6.2  dyoung 		 * If the packet is routed over IPsec tunnel, tell the
   1967  1.250.6.2  dyoung 		 * originator the tunnel MTU.
   1968  1.250.6.2  dyoung 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
   1969  1.250.6.2  dyoung 		 * XXX quickhack!!!
   1970  1.250.6.2  dyoung 		 */
   1971  1.250.6.2  dyoung 		if (ipforward_rt.ro_rt != NULL) {
   1972  1.250.6.2  dyoung 			struct secpolicy *sp;
   1973  1.250.6.2  dyoung 			int ipsecerror;
   1974  1.250.6.2  dyoung 			size_t ipsechdr;
   1975  1.250.6.2  dyoung 			struct route *ro;
   1976  1.250.6.2  dyoung 
   1977  1.250.6.2  dyoung 			sp = ipsec4_getpolicybyaddr(mcopy,
   1978  1.250.6.2  dyoung 			    IPSEC_DIR_OUTBOUND, IP_FORWARDING,
   1979  1.250.6.2  dyoung 			    &ipsecerror);
   1980  1.250.6.2  dyoung 
   1981  1.250.6.2  dyoung 			if (sp == NULL)
   1982  1.250.6.2  dyoung 				destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
   1983  1.250.6.2  dyoung 			else {
   1984  1.250.6.2  dyoung 				/* count IPsec header size */
   1985  1.250.6.2  dyoung 				ipsechdr = ipsec4_hdrsiz(mcopy,
   1986  1.250.6.2  dyoung 				    IPSEC_DIR_OUTBOUND, NULL);
   1987  1.250.6.2  dyoung 
   1988  1.250.6.2  dyoung 				/*
   1989  1.250.6.2  dyoung 				 * find the correct route for outer IPv4
   1990  1.250.6.2  dyoung 				 * header, compute tunnel MTU.
   1991  1.250.6.2  dyoung 				 */
   1992  1.250.6.2  dyoung 
   1993  1.250.6.2  dyoung 				if (sp->req != NULL
   1994  1.250.6.2  dyoung 				 && sp->req->sav != NULL
   1995  1.250.6.2  dyoung 				 && sp->req->sav->sah != NULL) {
   1996  1.250.6.2  dyoung 					ro = &sp->req->sav->sah->sa_route;
   1997  1.250.6.2  dyoung 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
   1998  1.250.6.2  dyoung 						destmtu =
   1999  1.250.6.2  dyoung 						    ro->ro_rt->rt_rmx.rmx_mtu ?
   2000  1.250.6.2  dyoung 						    ro->ro_rt->rt_rmx.rmx_mtu :
   2001  1.250.6.2  dyoung 						    ro->ro_rt->rt_ifp->if_mtu;
   2002  1.250.6.2  dyoung 						destmtu -= ipsechdr;
   2003  1.250.6.2  dyoung 					}
   2004  1.250.6.2  dyoung 				}
   2005  1.250.6.2  dyoung 
   2006  1.250.6.2  dyoung #ifdef	IPSEC
   2007  1.250.6.2  dyoung 				key_freesp(sp);
   2008  1.250.6.2  dyoung #else
   2009  1.250.6.2  dyoung 				KEY_FREESP(&sp);
   2010  1.250.6.2  dyoung #endif
   2011  1.250.6.2  dyoung 			}
   2012  1.250.6.2  dyoung 		}
   2013  1.250.6.2  dyoung #endif /*IPSEC*/
   2014  1.250.6.2  dyoung 		ipstat.ips_cantfrag++;
   2015  1.250.6.2  dyoung 		break;
   2016  1.250.6.2  dyoung 
   2017  1.250.6.2  dyoung 	case ENOBUFS:
   2018  1.250.6.2  dyoung #if 1
   2019  1.250.6.2  dyoung 		/*
   2020  1.250.6.2  dyoung 		 * a router should not generate ICMP_SOURCEQUENCH as
   2021  1.250.6.2  dyoung 		 * required in RFC1812 Requirements for IP Version 4 Routers.
   2022  1.250.6.2  dyoung 		 * source quench could be a big problem under DoS attacks,
   2023  1.250.6.2  dyoung 		 * or if the underlying interface is rate-limited.
   2024  1.250.6.2  dyoung 		 */
   2025  1.250.6.2  dyoung 		if (mcopy)
   2026  1.250.6.2  dyoung 			m_freem(mcopy);
   2027  1.250.6.2  dyoung 		return;
   2028  1.250.6.2  dyoung #else
   2029  1.250.6.2  dyoung 		type = ICMP_SOURCEQUENCH;
   2030  1.250.6.2  dyoung 		code = 0;
   2031  1.250.6.2  dyoung 		break;
   2032  1.250.6.2  dyoung #endif
   2033  1.250.6.2  dyoung 	}
   2034  1.250.6.2  dyoung 	icmp_error(mcopy, type, code, dest, destmtu);
   2035  1.250.6.2  dyoung }
   2036  1.250.6.2  dyoung 
   2037  1.250.6.2  dyoung void
   2038  1.250.6.2  dyoung ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
   2039  1.250.6.2  dyoung     struct mbuf *m)
   2040  1.250.6.2  dyoung {
   2041  1.250.6.2  dyoung 
   2042  1.250.6.2  dyoung 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
   2043  1.250.6.2  dyoung 		struct timeval tv;
   2044  1.250.6.2  dyoung 
   2045  1.250.6.2  dyoung 		microtime(&tv);
   2046  1.250.6.2  dyoung 		*mp = sbcreatecontrol((void *) &tv, sizeof(tv),
   2047  1.250.6.2  dyoung 		    SCM_TIMESTAMP, SOL_SOCKET);
   2048  1.250.6.2  dyoung 		if (*mp)
   2049  1.250.6.2  dyoung 			mp = &(*mp)->m_next;
   2050  1.250.6.2  dyoung 	}
   2051  1.250.6.2  dyoung 	if (inp->inp_flags & INP_RECVDSTADDR) {
   2052  1.250.6.2  dyoung 		*mp = sbcreatecontrol((void *) &ip->ip_dst,
   2053  1.250.6.2  dyoung 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
   2054  1.250.6.2  dyoung 		if (*mp)
   2055  1.250.6.2  dyoung 			mp = &(*mp)->m_next;
   2056  1.250.6.2  dyoung 	}
   2057  1.250.6.2  dyoung #ifdef notyet
   2058  1.250.6.2  dyoung 	/*
   2059  1.250.6.2  dyoung 	 * XXX
   2060  1.250.6.2  dyoung 	 * Moving these out of udp_input() made them even more broken
   2061  1.250.6.2  dyoung 	 * than they already were.
   2062  1.250.6.2  dyoung 	 *	- fenner (at) parc.xerox.com
   2063  1.250.6.2  dyoung 	 */
   2064  1.250.6.2  dyoung 	/* options were tossed already */
   2065  1.250.6.2  dyoung 	if (inp->inp_flags & INP_RECVOPTS) {
   2066  1.250.6.2  dyoung 		*mp = sbcreatecontrol((void *) opts_deleted_above,
   2067  1.250.6.2  dyoung 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
   2068  1.250.6.2  dyoung 		if (*mp)
   2069  1.250.6.2  dyoung 			mp = &(*mp)->m_next;
   2070  1.250.6.2  dyoung 	}
   2071  1.250.6.2  dyoung 	/* ip_srcroute doesn't do what we want here, need to fix */
   2072  1.250.6.2  dyoung 	if (inp->inp_flags & INP_RECVRETOPTS) {
   2073  1.250.6.2  dyoung 		*mp = sbcreatecontrol((void *) ip_srcroute(),
   2074  1.250.6.2  dyoung 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
   2075  1.250.6.2  dyoung 		if (*mp)
   2076  1.250.6.2  dyoung 			mp = &(*mp)->m_next;
   2077  1.250.6.2  dyoung 	}
   2078  1.250.6.2  dyoung #endif
   2079  1.250.6.2  dyoung 	if (inp->inp_flags & INP_RECVIF) {
   2080  1.250.6.2  dyoung 		struct sockaddr_dl sdl;
   2081  1.250.6.2  dyoung 
   2082  1.250.6.2  dyoung 		sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
   2083  1.250.6.2  dyoung 		sdl.sdl_family = AF_LINK;
   2084  1.250.6.2  dyoung 		sdl.sdl_index = m->m_pkthdr.rcvif ?
   2085  1.250.6.2  dyoung 		    m->m_pkthdr.rcvif->if_index : 0;
   2086  1.250.6.2  dyoung 		sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
   2087  1.250.6.2  dyoung 		*mp = sbcreatecontrol((void *) &sdl, sdl.sdl_len,
   2088  1.250.6.2  dyoung 		    IP_RECVIF, IPPROTO_IP);
   2089  1.250.6.2  dyoung 		if (*mp)
   2090  1.250.6.2  dyoung 			mp = &(*mp)->m_next;
   2091  1.250.6.2  dyoung 	}
   2092  1.250.6.2  dyoung }
   2093  1.250.6.2  dyoung 
   2094  1.250.6.2  dyoung /*
   2095  1.250.6.2  dyoung  * sysctl helper routine for net.inet.ip.forwsrcrt.
   2096  1.250.6.2  dyoung  */
   2097  1.250.6.2  dyoung static int
   2098  1.250.6.2  dyoung sysctl_net_inet_ip_forwsrcrt(SYSCTLFN_ARGS)
   2099  1.250.6.2  dyoung {
   2100  1.250.6.2  dyoung 	int error, tmp;
   2101  1.250.6.2  dyoung 	struct sysctlnode node;
   2102  1.250.6.2  dyoung 
   2103  1.250.6.2  dyoung 	node = *rnode;
   2104  1.250.6.2  dyoung 	tmp = ip_forwsrcrt;
   2105  1.250.6.2  dyoung 	node.sysctl_data = &tmp;
   2106  1.250.6.2  dyoung 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   2107  1.250.6.2  dyoung 	if (error || newp == NULL)
   2108  1.250.6.2  dyoung 		return (error);
   2109  1.250.6.2  dyoung 
   2110  1.250.6.2  dyoung 	if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FORWSRCRT,
   2111  1.250.6.2  dyoung 	    0, NULL, NULL, NULL))
   2112  1.250.6.2  dyoung 		return (EPERM);
   2113  1.250.6.2  dyoung 
   2114  1.250.6.2  dyoung 	ip_forwsrcrt = tmp;
   2115  1.250.6.2  dyoung 
   2116  1.250.6.2  dyoung 	return (0);
   2117  1.250.6.2  dyoung }
   2118  1.250.6.2  dyoung 
   2119  1.250.6.2  dyoung /*
   2120  1.250.6.2  dyoung  * sysctl helper routine for net.inet.ip.mtudisctimeout.  checks the
   2121  1.250.6.2  dyoung  * range of the new value and tweaks timers if it changes.
   2122  1.250.6.2  dyoung  */
   2123  1.250.6.2  dyoung static int
   2124  1.250.6.2  dyoung sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS)
   2125  1.250.6.2  dyoung {
   2126  1.250.6.2  dyoung 	int error, tmp;
   2127  1.250.6.2  dyoung 	struct sysctlnode node;
   2128  1.250.6.2  dyoung 
   2129  1.250.6.2  dyoung 	node = *rnode;
   2130  1.250.6.2  dyoung 	tmp = ip_mtudisc_timeout;
   2131  1.250.6.2  dyoung 	node.sysctl_data = &tmp;
   2132  1.250.6.2  dyoung 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   2133  1.250.6.2  dyoung 	if (error || newp == NULL)
   2134  1.250.6.2  dyoung 		return (error);
   2135  1.250.6.2  dyoung 	if (tmp < 0)
   2136  1.250.6.2  dyoung 		return (EINVAL);
   2137  1.250.6.2  dyoung 
   2138  1.250.6.2  dyoung 	ip_mtudisc_timeout = tmp;
   2139  1.250.6.2  dyoung 	rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout);
   2140  1.250.6.2  dyoung 
   2141  1.250.6.2  dyoung 	return (0);
   2142  1.250.6.2  dyoung }
   2143  1.250.6.2  dyoung 
   2144  1.250.6.2  dyoung #ifdef GATEWAY
   2145  1.250.6.2  dyoung /*
   2146  1.250.6.2  dyoung  * sysctl helper routine for net.inet.ip.maxflows.
   2147  1.250.6.2  dyoung  */
   2148  1.250.6.2  dyoung static int
   2149  1.250.6.2  dyoung sysctl_net_inet_ip_maxflows(SYSCTLFN_ARGS)
   2150  1.250.6.2  dyoung {
   2151  1.250.6.2  dyoung 	int s;
   2152  1.250.6.2  dyoung 
   2153  1.250.6.2  dyoung 	s = sysctl_lookup(SYSCTLFN_CALL(rnode));
   2154  1.250.6.2  dyoung 	if (s || newp == NULL)
   2155  1.250.6.2  dyoung 		return (s);
   2156  1.250.6.2  dyoung 
   2157  1.250.6.2  dyoung 	s = splsoftnet();
   2158  1.250.6.2  dyoung 	ipflow_reap(0);
   2159  1.250.6.2  dyoung 	splx(s);
   2160  1.250.6.2  dyoung 
   2161  1.250.6.2  dyoung 	return (0);
   2162  1.250.6.2  dyoung }
   2163  1.250.6.2  dyoung 
   2164  1.250.6.2  dyoung static int
   2165  1.250.6.2  dyoung sysctl_net_inet_ip_hashsize(SYSCTLFN_ARGS)
   2166  1.250.6.2  dyoung {
   2167  1.250.6.2  dyoung 	int error, tmp;
   2168  1.250.6.2  dyoung 	struct sysctlnode node;
   2169  1.250.6.2  dyoung 
   2170  1.250.6.2  dyoung 	node = *rnode;
   2171  1.250.6.2  dyoung 	tmp = ip_hashsize;
   2172  1.250.6.2  dyoung 	node.sysctl_data = &tmp;
   2173  1.250.6.2  dyoung 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   2174  1.250.6.2  dyoung 	if (error || newp == NULL)
   2175  1.250.6.2  dyoung 		return (error);
   2176  1.250.6.2  dyoung 
   2177  1.250.6.2  dyoung 	if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
   2178  1.250.6.2  dyoung 		/*
   2179  1.250.6.2  dyoung 		 * Can only fail due to malloc()
   2180  1.250.6.2  dyoung 		 */
   2181  1.250.6.2  dyoung 		if (ipflow_invalidate_all(tmp))
   2182  1.250.6.2  dyoung 			return ENOMEM;
   2183  1.250.6.2  dyoung 	} else {
   2184  1.250.6.2  dyoung 		/*
   2185  1.250.6.2  dyoung 		 * EINVAL if not a power of 2
   2186  1.250.6.2  dyoung 	         */
   2187  1.250.6.2  dyoung 		return EINVAL;
   2188  1.250.6.2  dyoung 	}
   2189  1.250.6.2  dyoung 
   2190  1.250.6.2  dyoung 	return (0);
   2191  1.250.6.2  dyoung }
   2192  1.250.6.2  dyoung #endif /* GATEWAY */
   2193  1.250.6.2  dyoung 
   2194  1.250.6.2  dyoung 
   2195  1.250.6.2  dyoung SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup")
   2196  1.250.6.2  dyoung {
   2197  1.250.6.2  dyoung 	extern int subnetsarelocal, hostzeroisbroadcast;
   2198  1.250.6.2  dyoung 
   2199  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2200  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT,
   2201  1.250.6.2  dyoung 		       CTLTYPE_NODE, "net", NULL,
   2202  1.250.6.2  dyoung 		       NULL, 0, NULL, 0,
   2203  1.250.6.2  dyoung 		       CTL_NET, CTL_EOL);
   2204  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2205  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT,
   2206  1.250.6.2  dyoung 		       CTLTYPE_NODE, "inet",
   2207  1.250.6.2  dyoung 		       SYSCTL_DESCR("PF_INET related settings"),
   2208  1.250.6.2  dyoung 		       NULL, 0, NULL, 0,
   2209  1.250.6.2  dyoung 		       CTL_NET, PF_INET, CTL_EOL);
   2210  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2211  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT,
   2212  1.250.6.2  dyoung 		       CTLTYPE_NODE, "ip",
   2213  1.250.6.2  dyoung 		       SYSCTL_DESCR("IPv4 related settings"),
   2214  1.250.6.2  dyoung 		       NULL, 0, NULL, 0,
   2215  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
   2216  1.250.6.2  dyoung 
   2217  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2218  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2219  1.250.6.2  dyoung 		       CTLTYPE_INT, "forwarding",
   2220  1.250.6.2  dyoung 		       SYSCTL_DESCR("Enable forwarding of INET datagrams"),
   2221  1.250.6.2  dyoung 		       NULL, 0, &ipforwarding, 0,
   2222  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2223  1.250.6.2  dyoung 		       IPCTL_FORWARDING, CTL_EOL);
   2224  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2225  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2226  1.250.6.2  dyoung 		       CTLTYPE_INT, "redirect",
   2227  1.250.6.2  dyoung 		       SYSCTL_DESCR("Enable sending of ICMP redirect messages"),
   2228  1.250.6.2  dyoung 		       NULL, 0, &ipsendredirects, 0,
   2229  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2230  1.250.6.2  dyoung 		       IPCTL_SENDREDIRECTS, CTL_EOL);
   2231  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2232  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2233  1.250.6.2  dyoung 		       CTLTYPE_INT, "ttl",
   2234  1.250.6.2  dyoung 		       SYSCTL_DESCR("Default TTL for an INET datagram"),
   2235  1.250.6.2  dyoung 		       NULL, 0, &ip_defttl, 0,
   2236  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2237  1.250.6.2  dyoung 		       IPCTL_DEFTTL, CTL_EOL);
   2238  1.250.6.2  dyoung #ifdef IPCTL_DEFMTU
   2239  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2240  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT /* |CTLFLAG_READWRITE? */,
   2241  1.250.6.2  dyoung 		       CTLTYPE_INT, "mtu",
   2242  1.250.6.2  dyoung 		       SYSCTL_DESCR("Default MTA for an INET route"),
   2243  1.250.6.2  dyoung 		       NULL, 0, &ip_mtu, 0,
   2244  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2245  1.250.6.2  dyoung 		       IPCTL_DEFMTU, CTL_EOL);
   2246  1.250.6.2  dyoung #endif /* IPCTL_DEFMTU */
   2247  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2248  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2249  1.250.6.2  dyoung 		       CTLTYPE_INT, "forwsrcrt",
   2250  1.250.6.2  dyoung 		       SYSCTL_DESCR("Enable forwarding of source-routed "
   2251  1.250.6.2  dyoung 				    "datagrams"),
   2252  1.250.6.2  dyoung 		       sysctl_net_inet_ip_forwsrcrt, 0, &ip_forwsrcrt, 0,
   2253  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2254  1.250.6.2  dyoung 		       IPCTL_FORWSRCRT, CTL_EOL);
   2255  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2256  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2257  1.250.6.2  dyoung 		       CTLTYPE_INT, "directed-broadcast",
   2258  1.250.6.2  dyoung 		       SYSCTL_DESCR("Enable forwarding of broadcast datagrams"),
   2259  1.250.6.2  dyoung 		       NULL, 0, &ip_directedbcast, 0,
   2260  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2261  1.250.6.2  dyoung 		       IPCTL_DIRECTEDBCAST, CTL_EOL);
   2262  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2263  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2264  1.250.6.2  dyoung 		       CTLTYPE_INT, "allowsrcrt",
   2265  1.250.6.2  dyoung 		       SYSCTL_DESCR("Accept source-routed datagrams"),
   2266  1.250.6.2  dyoung 		       NULL, 0, &ip_allowsrcrt, 0,
   2267  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2268  1.250.6.2  dyoung 		       IPCTL_ALLOWSRCRT, CTL_EOL);
   2269  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2270  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2271  1.250.6.2  dyoung 		       CTLTYPE_INT, "subnetsarelocal",
   2272  1.250.6.2  dyoung 		       SYSCTL_DESCR("Whether logical subnets are considered "
   2273  1.250.6.2  dyoung 				    "local"),
   2274  1.250.6.2  dyoung 		       NULL, 0, &subnetsarelocal, 0,
   2275  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2276  1.250.6.2  dyoung 		       IPCTL_SUBNETSARELOCAL, CTL_EOL);
   2277  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2278  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2279  1.250.6.2  dyoung 		       CTLTYPE_INT, "mtudisc",
   2280  1.250.6.2  dyoung 		       SYSCTL_DESCR("Use RFC1191 Path MTU Discovery"),
   2281  1.250.6.2  dyoung 		       NULL, 0, &ip_mtudisc, 0,
   2282  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2283  1.250.6.2  dyoung 		       IPCTL_MTUDISC, CTL_EOL);
   2284  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2285  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2286  1.250.6.2  dyoung 		       CTLTYPE_INT, "anonportmin",
   2287  1.250.6.2  dyoung 		       SYSCTL_DESCR("Lowest ephemeral port number to assign"),
   2288  1.250.6.2  dyoung 		       sysctl_net_inet_ip_ports, 0, &anonportmin, 0,
   2289  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2290  1.250.6.2  dyoung 		       IPCTL_ANONPORTMIN, CTL_EOL);
   2291  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2292  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2293  1.250.6.2  dyoung 		       CTLTYPE_INT, "anonportmax",
   2294  1.250.6.2  dyoung 		       SYSCTL_DESCR("Highest ephemeral port number to assign"),
   2295  1.250.6.2  dyoung 		       sysctl_net_inet_ip_ports, 0, &anonportmax, 0,
   2296  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2297  1.250.6.2  dyoung 		       IPCTL_ANONPORTMAX, CTL_EOL);
   2298  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2299  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2300  1.250.6.2  dyoung 		       CTLTYPE_INT, "mtudisctimeout",
   2301  1.250.6.2  dyoung 		       SYSCTL_DESCR("Lifetime of a Path MTU Discovered route"),
   2302  1.250.6.2  dyoung 		       sysctl_net_inet_ip_pmtudto, 0, &ip_mtudisc_timeout, 0,
   2303  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2304  1.250.6.2  dyoung 		       IPCTL_MTUDISCTIMEOUT, CTL_EOL);
   2305  1.250.6.2  dyoung #ifdef GATEWAY
   2306  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2307  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2308  1.250.6.2  dyoung 		       CTLTYPE_INT, "maxflows",
   2309  1.250.6.2  dyoung 		       SYSCTL_DESCR("Number of flows for fast forwarding"),
   2310  1.250.6.2  dyoung 		       sysctl_net_inet_ip_maxflows, 0, &ip_maxflows, 0,
   2311  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2312  1.250.6.2  dyoung 		       IPCTL_MAXFLOWS, CTL_EOL);
   2313  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2314  1.250.6.2  dyoung 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2315  1.250.6.2  dyoung 			CTLTYPE_INT, "hashsize",
   2316  1.250.6.2  dyoung 			SYSCTL_DESCR("Size of hash table for fast forwarding (IPv4)"),
   2317  1.250.6.2  dyoung 			sysctl_net_inet_ip_hashsize, 0, &ip_hashsize, 0,
   2318  1.250.6.2  dyoung 			CTL_NET, PF_INET, IPPROTO_IP,
   2319  1.250.6.2  dyoung 			CTL_CREATE, CTL_EOL);
   2320  1.250.6.2  dyoung #endif /* GATEWAY */
   2321  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2322  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2323  1.250.6.2  dyoung 		       CTLTYPE_INT, "hostzerobroadcast",
   2324  1.250.6.2  dyoung 		       SYSCTL_DESCR("All zeroes address is broadcast address"),
   2325  1.250.6.2  dyoung 		       NULL, 0, &hostzeroisbroadcast, 0,
   2326  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2327  1.250.6.2  dyoung 		       IPCTL_HOSTZEROBROADCAST, CTL_EOL);
   2328  1.250.6.2  dyoung #if NGIF > 0
   2329  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2330  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2331  1.250.6.2  dyoung 		       CTLTYPE_INT, "gifttl",
   2332  1.250.6.2  dyoung 		       SYSCTL_DESCR("Default TTL for a gif tunnel datagram"),
   2333  1.250.6.2  dyoung 		       NULL, 0, &ip_gif_ttl, 0,
   2334  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2335  1.250.6.2  dyoung 		       IPCTL_GIF_TTL, CTL_EOL);
   2336  1.250.6.2  dyoung #endif /* NGIF */
   2337  1.250.6.2  dyoung #ifndef IPNOPRIVPORTS
   2338  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2339  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2340  1.250.6.2  dyoung 		       CTLTYPE_INT, "lowportmin",
   2341  1.250.6.2  dyoung 		       SYSCTL_DESCR("Lowest privileged ephemeral port number "
   2342  1.250.6.2  dyoung 				    "to assign"),
   2343  1.250.6.2  dyoung 		       sysctl_net_inet_ip_ports, 0, &lowportmin, 0,
   2344  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2345  1.250.6.2  dyoung 		       IPCTL_LOWPORTMIN, CTL_EOL);
   2346  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2347  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2348  1.250.6.2  dyoung 		       CTLTYPE_INT, "lowportmax",
   2349  1.250.6.2  dyoung 		       SYSCTL_DESCR("Highest privileged ephemeral port number "
   2350  1.250.6.2  dyoung 				    "to assign"),
   2351  1.250.6.2  dyoung 		       sysctl_net_inet_ip_ports, 0, &lowportmax, 0,
   2352  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2353  1.250.6.2  dyoung 		       IPCTL_LOWPORTMAX, CTL_EOL);
   2354  1.250.6.2  dyoung #endif /* IPNOPRIVPORTS */
   2355  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2356  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2357  1.250.6.2  dyoung 		       CTLTYPE_INT, "maxfragpackets",
   2358  1.250.6.2  dyoung 		       SYSCTL_DESCR("Maximum number of fragments to retain for "
   2359  1.250.6.2  dyoung 				    "possible reassembly"),
   2360  1.250.6.2  dyoung 		       NULL, 0, &ip_maxfragpackets, 0,
   2361  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2362  1.250.6.2  dyoung 		       IPCTL_MAXFRAGPACKETS, CTL_EOL);
   2363  1.250.6.2  dyoung #if NGRE > 0
   2364  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2365  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2366  1.250.6.2  dyoung 		       CTLTYPE_INT, "grettl",
   2367  1.250.6.2  dyoung 		       SYSCTL_DESCR("Default TTL for a gre tunnel datagram"),
   2368  1.250.6.2  dyoung 		       NULL, 0, &ip_gre_ttl, 0,
   2369  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2370  1.250.6.2  dyoung 		       IPCTL_GRE_TTL, CTL_EOL);
   2371  1.250.6.2  dyoung #endif /* NGRE */
   2372  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2373  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2374  1.250.6.2  dyoung 		       CTLTYPE_INT, "checkinterface",
   2375  1.250.6.2  dyoung 		       SYSCTL_DESCR("Enable receive side of Strong ES model "
   2376  1.250.6.2  dyoung 				    "from RFC1122"),
   2377  1.250.6.2  dyoung 		       NULL, 0, &ip_checkinterface, 0,
   2378  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2379  1.250.6.2  dyoung 		       IPCTL_CHECKINTERFACE, CTL_EOL);
   2380  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2381  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2382  1.250.6.2  dyoung 		       CTLTYPE_INT, "random_id",
   2383  1.250.6.2  dyoung 		       SYSCTL_DESCR("Assign random ip_id values"),
   2384  1.250.6.2  dyoung 		       NULL, 0, &ip_do_randomid, 0,
   2385  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2386  1.250.6.2  dyoung 		       IPCTL_RANDOMID, CTL_EOL);
   2387  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2388  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2389  1.250.6.2  dyoung 		       CTLTYPE_INT, "do_loopback_cksum",
   2390  1.250.6.2  dyoung 		       SYSCTL_DESCR("Perform IP checksum on loopback"),
   2391  1.250.6.2  dyoung 		       NULL, 0, &ip_do_loopback_cksum, 0,
   2392  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP,
   2393  1.250.6.2  dyoung 		       IPCTL_LOOPBACKCKSUM, CTL_EOL);
   2394  1.250.6.2  dyoung 	sysctl_createv(clog, 0, NULL, NULL,
   2395  1.250.6.2  dyoung 		       CTLFLAG_PERMANENT,
   2396  1.250.6.2  dyoung 		       CTLTYPE_STRUCT, "stats",
   2397  1.250.6.2  dyoung 		       SYSCTL_DESCR("IP statistics"),
   2398  1.250.6.2  dyoung 		       NULL, 0, &ipstat, sizeof(ipstat),
   2399  1.250.6.2  dyoung 		       CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS,
   2400  1.250.6.2  dyoung 		       CTL_EOL);
   2401  1.250.6.2  dyoung }
   2402