Home | History | Annotate | Line # | Download | only in netinet
ip_input.c revision 1.248
      1  1.248  liamjfoy /*	$NetBSD: ip_input.c,v 1.248 2007/03/25 20:12:20 liamjfoy Exp $	*/
      2   1.89    itojun 
      3   1.89    itojun /*
      4   1.89    itojun  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5   1.89    itojun  * All rights reserved.
      6  1.152    itojun  *
      7   1.89    itojun  * Redistribution and use in source and binary forms, with or without
      8   1.89    itojun  * modification, are permitted provided that the following conditions
      9   1.89    itojun  * are met:
     10   1.89    itojun  * 1. Redistributions of source code must retain the above copyright
     11   1.89    itojun  *    notice, this list of conditions and the following disclaimer.
     12   1.89    itojun  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.89    itojun  *    notice, this list of conditions and the following disclaimer in the
     14   1.89    itojun  *    documentation and/or other materials provided with the distribution.
     15   1.89    itojun  * 3. Neither the name of the project nor the names of its contributors
     16   1.89    itojun  *    may be used to endorse or promote products derived from this software
     17   1.89    itojun  *    without specific prior written permission.
     18  1.152    itojun  *
     19   1.89    itojun  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20   1.89    itojun  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21   1.89    itojun  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22   1.89    itojun  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23   1.89    itojun  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24   1.89    itojun  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25   1.89    itojun  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26   1.89    itojun  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27   1.89    itojun  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28   1.89    itojun  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29   1.89    itojun  * SUCH DAMAGE.
     30   1.89    itojun  */
     31   1.76   thorpej 
     32   1.76   thorpej /*-
     33   1.76   thorpej  * Copyright (c) 1998 The NetBSD Foundation, Inc.
     34   1.76   thorpej  * All rights reserved.
     35   1.76   thorpej  *
     36   1.76   thorpej  * This code is derived from software contributed to The NetBSD Foundation
     37   1.76   thorpej  * by Public Access Networks Corporation ("Panix").  It was developed under
     38   1.76   thorpej  * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
     39   1.76   thorpej  *
     40   1.76   thorpej  * Redistribution and use in source and binary forms, with or without
     41   1.76   thorpej  * modification, are permitted provided that the following conditions
     42   1.76   thorpej  * are met:
     43   1.76   thorpej  * 1. Redistributions of source code must retain the above copyright
     44   1.76   thorpej  *    notice, this list of conditions and the following disclaimer.
     45   1.76   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     46   1.76   thorpej  *    notice, this list of conditions and the following disclaimer in the
     47   1.76   thorpej  *    documentation and/or other materials provided with the distribution.
     48   1.76   thorpej  * 3. All advertising materials mentioning features or use of this software
     49   1.76   thorpej  *    must display the following acknowledgement:
     50   1.76   thorpej  *	This product includes software developed by the NetBSD
     51   1.76   thorpej  *	Foundation, Inc. and its contributors.
     52   1.76   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     53   1.76   thorpej  *    contributors may be used to endorse or promote products derived
     54   1.76   thorpej  *    from this software without specific prior written permission.
     55   1.76   thorpej  *
     56   1.76   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     57   1.76   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     58   1.76   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     59   1.76   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     60   1.76   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     61   1.76   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     62   1.76   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     63   1.76   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     64   1.76   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     65   1.76   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     66   1.76   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     67   1.76   thorpej  */
     68   1.14       cgd 
     69    1.1       cgd /*
     70   1.13   mycroft  * Copyright (c) 1982, 1986, 1988, 1993
     71   1.13   mycroft  *	The Regents of the University of California.  All rights reserved.
     72    1.1       cgd  *
     73    1.1       cgd  * Redistribution and use in source and binary forms, with or without
     74    1.1       cgd  * modification, are permitted provided that the following conditions
     75    1.1       cgd  * are met:
     76    1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     77    1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     78    1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     79    1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     80    1.1       cgd  *    documentation and/or other materials provided with the distribution.
     81  1.172       agc  * 3. Neither the name of the University nor the names of its contributors
     82    1.1       cgd  *    may be used to endorse or promote products derived from this software
     83    1.1       cgd  *    without specific prior written permission.
     84    1.1       cgd  *
     85    1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     86    1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     87    1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     88    1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     89    1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     90    1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     91    1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     92    1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     93    1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     94    1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     95    1.1       cgd  * SUCH DAMAGE.
     96    1.1       cgd  *
     97   1.14       cgd  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
     98    1.1       cgd  */
     99  1.141     lukem 
    100  1.141     lukem #include <sys/cdefs.h>
    101  1.248  liamjfoy __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.248 2007/03/25 20:12:20 liamjfoy Exp $");
    102   1.55    scottr 
    103  1.184  jonathan #include "opt_inet.h"
    104   1.62      matt #include "opt_gateway.h"
    105   1.69       mrg #include "opt_pfil_hooks.h"
    106   1.91   thorpej #include "opt_ipsec.h"
    107   1.55    scottr #include "opt_mrouting.h"
    108  1.167    martin #include "opt_mbuftrace.h"
    109  1.135   thorpej #include "opt_inet_csum.h"
    110    1.1       cgd 
    111    1.5   mycroft #include <sys/param.h>
    112    1.5   mycroft #include <sys/systm.h>
    113    1.5   mycroft #include <sys/malloc.h>
    114    1.5   mycroft #include <sys/mbuf.h>
    115    1.5   mycroft #include <sys/domain.h>
    116    1.5   mycroft #include <sys/protosw.h>
    117    1.5   mycroft #include <sys/socket.h>
    118   1.44   thorpej #include <sys/socketvar.h>
    119    1.5   mycroft #include <sys/errno.h>
    120    1.5   mycroft #include <sys/time.h>
    121    1.5   mycroft #include <sys/kernel.h>
    122   1.72   thorpej #include <sys/pool.h>
    123   1.28  christos #include <sys/sysctl.h>
    124  1.230      elad #include <sys/kauth.h>
    125    1.1       cgd 
    126    1.5   mycroft #include <net/if.h>
    127   1.44   thorpej #include <net/if_dl.h>
    128    1.5   mycroft #include <net/route.h>
    129   1.45       mrg #include <net/pfil.h>
    130    1.1       cgd 
    131    1.5   mycroft #include <netinet/in.h>
    132    1.5   mycroft #include <netinet/in_systm.h>
    133    1.5   mycroft #include <netinet/ip.h>
    134    1.5   mycroft #include <netinet/in_pcb.h>
    135  1.215      yamt #include <netinet/in_proto.h>
    136    1.5   mycroft #include <netinet/in_var.h>
    137    1.5   mycroft #include <netinet/ip_var.h>
    138    1.5   mycroft #include <netinet/ip_icmp.h>
    139   1.89    itojun /* just for gif_ttl */
    140   1.89    itojun #include <netinet/in_gif.h>
    141   1.89    itojun #include "gif.h"
    142  1.144    martin #include <net/if_gre.h>
    143  1.144    martin #include "gre.h"
    144  1.111  jdolecek 
    145  1.111  jdolecek #ifdef MROUTING
    146  1.111  jdolecek #include <netinet/ip_mroute.h>
    147  1.111  jdolecek #endif
    148   1.89    itojun 
    149   1.89    itojun #ifdef IPSEC
    150   1.89    itojun #include <netinet6/ipsec.h>
    151   1.89    itojun #include <netkey/key.h>
    152   1.89    itojun #endif
    153  1.173  jonathan #ifdef FAST_IPSEC
    154  1.173  jonathan #include <netipsec/ipsec.h>
    155  1.173  jonathan #include <netipsec/key.h>
    156  1.173  jonathan #endif	/* FAST_IPSEC*/
    157   1.44   thorpej 
    158    1.1       cgd #ifndef	IPFORWARDING
    159    1.1       cgd #ifdef GATEWAY
    160    1.1       cgd #define	IPFORWARDING	1	/* forward IP packets not for us */
    161    1.1       cgd #else /* GATEWAY */
    162    1.1       cgd #define	IPFORWARDING	0	/* don't forward IP packets not for us */
    163    1.1       cgd #endif /* GATEWAY */
    164    1.1       cgd #endif /* IPFORWARDING */
    165    1.1       cgd #ifndef	IPSENDREDIRECTS
    166    1.1       cgd #define	IPSENDREDIRECTS	1
    167    1.1       cgd #endif
    168   1.26   thorpej #ifndef IPFORWSRCRT
    169   1.47       cjs #define	IPFORWSRCRT	1	/* forward source-routed packets */
    170   1.47       cjs #endif
    171   1.47       cjs #ifndef IPALLOWSRCRT
    172   1.48       mrg #define	IPALLOWSRCRT	1	/* allow source-routed packets */
    173   1.26   thorpej #endif
    174   1.53       kml #ifndef IPMTUDISC
    175  1.153    itojun #define IPMTUDISC	1
    176   1.53       kml #endif
    177   1.60       kml #ifndef IPMTUDISCTIMEOUT
    178   1.61       kml #define IPMTUDISCTIMEOUT (10 * 60)	/* as per RFC 1191 */
    179   1.60       kml #endif
    180   1.53       kml 
    181   1.27   thorpej /*
    182   1.27   thorpej  * Note: DIRECTED_BROADCAST is handled this way so that previous
    183   1.27   thorpej  * configuration using this option will Just Work.
    184   1.27   thorpej  */
    185   1.27   thorpej #ifndef IPDIRECTEDBCAST
    186   1.27   thorpej #ifdef DIRECTED_BROADCAST
    187   1.27   thorpej #define IPDIRECTEDBCAST	1
    188   1.27   thorpej #else
    189   1.27   thorpej #define	IPDIRECTEDBCAST	0
    190   1.27   thorpej #endif /* DIRECTED_BROADCAST */
    191   1.27   thorpej #endif /* IPDIRECTEDBCAST */
    192    1.1       cgd int	ipforwarding = IPFORWARDING;
    193    1.1       cgd int	ipsendredirects = IPSENDREDIRECTS;
    194   1.13   mycroft int	ip_defttl = IPDEFTTL;
    195   1.26   thorpej int	ip_forwsrcrt = IPFORWSRCRT;
    196   1.27   thorpej int	ip_directedbcast = IPDIRECTEDBCAST;
    197   1.47       cjs int	ip_allowsrcrt = IPALLOWSRCRT;
    198   1.53       kml int	ip_mtudisc = IPMTUDISC;
    199  1.156    itojun int	ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
    200    1.1       cgd #ifdef DIAGNOSTIC
    201    1.1       cgd int	ipprintfs = 0;
    202    1.1       cgd #endif
    203  1.184  jonathan 
    204  1.184  jonathan int	ip_do_randomid = 0;
    205  1.184  jonathan 
    206  1.165  christos /*
    207  1.165  christos  * XXX - Setting ip_checkinterface mostly implements the receive side of
    208  1.165  christos  * the Strong ES model described in RFC 1122, but since the routing table
    209  1.165  christos  * and transmit implementation do not implement the Strong ES model,
    210  1.165  christos  * setting this to 1 results in an odd hybrid.
    211  1.165  christos  *
    212  1.165  christos  * XXX - ip_checkinterface currently must be disabled if you use ipnat
    213  1.165  christos  * to translate the destination address to another local interface.
    214  1.165  christos  *
    215  1.165  christos  * XXX - ip_checkinterface must be disabled if you add IP aliases
    216  1.165  christos  * to the loopback interface instead of the interface where the
    217  1.165  christos  * packets for those addresses are received.
    218  1.165  christos  */
    219  1.165  christos int	ip_checkinterface = 0;
    220  1.165  christos 
    221    1.1       cgd 
    222   1.60       kml struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
    223   1.60       kml 
    224    1.1       cgd int	ipqmaxlen = IFQ_MAXLEN;
    225  1.150      matt u_long	in_ifaddrhash;				/* size of hash table - 1 */
    226  1.150      matt int	in_ifaddrentries;			/* total number of addrs */
    227  1.212     perry struct in_ifaddrhead in_ifaddrhead;
    228   1.57       tls struct	in_ifaddrhashhead *in_ifaddrhashtbl;
    229  1.166      matt u_long	in_multihash;				/* size of hash table - 1 */
    230  1.166      matt int	in_multientries;			/* total number of addrs */
    231  1.166      matt struct	in_multihashhead *in_multihashtbl;
    232   1.13   mycroft struct	ifqueue ipintrq;
    233   1.63      matt struct	ipstat	ipstat;
    234  1.183  jonathan uint16_t ip_id;
    235   1.75   thorpej 
    236  1.121   thorpej #ifdef PFIL_HOOKS
    237  1.121   thorpej struct pfil_head inet_pfil_hook;
    238  1.121   thorpej #endif
    239  1.121   thorpej 
    240  1.194  jonathan /*
    241  1.194  jonathan  * Cached copy of nmbclusters. If nbclusters is different,
    242  1.194  jonathan  * recalculate IP parameters derived from nmbclusters.
    243  1.194  jonathan  */
    244  1.194  jonathan static int	ip_nmbclusters;			/* copy of nmbclusters */
    245  1.210     perry static void	ip_nmbclusters_changed(void);	/* recalc limits */
    246  1.194  jonathan 
    247  1.195   thorpej #define CHECK_NMBCLUSTER_PARAMS()				\
    248  1.195   thorpej do {								\
    249  1.195   thorpej 	if (__predict_false(ip_nmbclusters != nmbclusters))	\
    250  1.195   thorpej 		ip_nmbclusters_changed();			\
    251  1.195   thorpej } while (/*CONSTCOND*/0)
    252  1.194  jonathan 
    253  1.190  jonathan /* IP datagram reassembly queues (hashed) */
    254  1.190  jonathan #define IPREASS_NHASH_LOG2      6
    255  1.190  jonathan #define IPREASS_NHASH           (1 << IPREASS_NHASH_LOG2)
    256  1.190  jonathan #define IPREASS_HMASK           (IPREASS_NHASH - 1)
    257  1.190  jonathan #define IPREASS_HASH(x,y) \
    258  1.190  jonathan 	(((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
    259  1.190  jonathan struct ipqhead ipq[IPREASS_NHASH];
    260   1.75   thorpej int	ipq_locked;
    261  1.212     perry static int	ip_nfragpackets;	/* packets in reass queue */
    262  1.194  jonathan static int	ip_nfrags;		/* total fragments in reass queues */
    263  1.194  jonathan 
    264  1.194  jonathan int	ip_maxfragpackets = 200;	/* limit on packets. XXX sysctl */
    265  1.194  jonathan int	ip_maxfrags;		        /* limit on fragments. XXX sysctl */
    266  1.194  jonathan 
    267  1.194  jonathan 
    268  1.194  jonathan /*
    269  1.194  jonathan  * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for
    270  1.194  jonathan  * IP reassembly queue buffer managment.
    271  1.212     perry  *
    272  1.194  jonathan  * We keep a count of total IP fragments (NB: not fragmented packets!)
    273  1.194  jonathan  * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments.
    274  1.194  jonathan  * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the
    275  1.194  jonathan  * total fragments in  reassembly queues.This AIMD policy avoids
    276  1.194  jonathan  * repeatedly deleting single packets under heavy fragmentation load
    277  1.194  jonathan  * (e.g., from lossy NFS peers).
    278  1.194  jonathan  */
    279  1.212     perry static u_int	ip_reass_ttl_decr(u_int ticks);
    280  1.210     perry static void	ip_reass_drophalf(void);
    281  1.194  jonathan 
    282   1.75   thorpej 
    283  1.223     perry static inline int ipq_lock_try(void);
    284  1.223     perry static inline void ipq_unlock(void);
    285   1.75   thorpej 
    286  1.223     perry static inline int
    287  1.211     perry ipq_lock_try(void)
    288   1.75   thorpej {
    289   1.75   thorpej 	int s;
    290   1.75   thorpej 
    291  1.132   thorpej 	/*
    292  1.149       wiz 	 * Use splvm() -- we're blocking things that would cause
    293  1.132   thorpej 	 * mbuf allocation.
    294  1.132   thorpej 	 */
    295  1.132   thorpej 	s = splvm();
    296   1.75   thorpej 	if (ipq_locked) {
    297   1.75   thorpej 		splx(s);
    298   1.75   thorpej 		return (0);
    299   1.75   thorpej 	}
    300   1.75   thorpej 	ipq_locked = 1;
    301   1.75   thorpej 	splx(s);
    302   1.75   thorpej 	return (1);
    303   1.75   thorpej }
    304   1.75   thorpej 
    305  1.223     perry static inline void
    306  1.211     perry ipq_unlock(void)
    307   1.75   thorpej {
    308   1.75   thorpej 	int s;
    309   1.75   thorpej 
    310  1.132   thorpej 	s = splvm();
    311   1.75   thorpej 	ipq_locked = 0;
    312   1.75   thorpej 	splx(s);
    313   1.75   thorpej }
    314   1.75   thorpej 
    315   1.75   thorpej #ifdef DIAGNOSTIC
    316   1.75   thorpej #define	IPQ_LOCK()							\
    317   1.75   thorpej do {									\
    318   1.75   thorpej 	if (ipq_lock_try() == 0) {					\
    319   1.75   thorpej 		printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
    320   1.75   thorpej 		panic("ipq_lock");					\
    321   1.75   thorpej 	}								\
    322  1.159     perry } while (/*CONSTCOND*/ 0)
    323   1.75   thorpej #define	IPQ_LOCK_CHECK()						\
    324   1.75   thorpej do {									\
    325   1.75   thorpej 	if (ipq_locked == 0) {						\
    326   1.75   thorpej 		printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
    327   1.75   thorpej 		panic("ipq lock check");				\
    328   1.75   thorpej 	}								\
    329  1.159     perry } while (/*CONSTCOND*/ 0)
    330   1.75   thorpej #else
    331   1.75   thorpej #define	IPQ_LOCK()		(void) ipq_lock_try()
    332   1.75   thorpej #define	IPQ_LOCK_CHECK()	/* nothing */
    333   1.75   thorpej #endif
    334   1.75   thorpej 
    335   1.75   thorpej #define	IPQ_UNLOCK()		ipq_unlock()
    336    1.1       cgd 
    337  1.246        ad POOL_INIT(inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", NULL,
    338  1.246        ad     IPL_SOFTNET);
    339  1.246        ad POOL_INIT(ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", NULL,
    340  1.246        ad     IPL_VM);
    341   1.72   thorpej 
    342  1.135   thorpej #ifdef INET_CSUM_COUNTERS
    343  1.135   thorpej #include <sys/device.h>
    344  1.135   thorpej 
    345  1.135   thorpej struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    346  1.135   thorpej     NULL, "inet", "hwcsum bad");
    347  1.135   thorpej struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    348  1.135   thorpej     NULL, "inet", "hwcsum ok");
    349  1.135   thorpej struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    350  1.135   thorpej     NULL, "inet", "swcsum");
    351  1.135   thorpej 
    352  1.135   thorpej #define	INET_CSUM_COUNTER_INCR(ev)	(ev)->ev_count++
    353  1.135   thorpej 
    354  1.201      matt EVCNT_ATTACH_STATIC(ip_hwcsum_bad);
    355  1.201      matt EVCNT_ATTACH_STATIC(ip_hwcsum_ok);
    356  1.201      matt EVCNT_ATTACH_STATIC(ip_swcsum);
    357  1.201      matt 
    358  1.135   thorpej #else
    359  1.135   thorpej 
    360  1.135   thorpej #define	INET_CSUM_COUNTER_INCR(ev)	/* nothing */
    361  1.135   thorpej 
    362  1.135   thorpej #endif /* INET_CSUM_COUNTERS */
    363  1.135   thorpej 
    364    1.1       cgd /*
    365    1.1       cgd  * We need to save the IP options in case a protocol wants to respond
    366    1.1       cgd  * to an incoming packet over the same route if the packet got here
    367    1.1       cgd  * using IP source routing.  This allows connection establishment and
    368    1.1       cgd  * maintenance when the remote end is on a network that is not known
    369    1.1       cgd  * to us.
    370    1.1       cgd  */
    371    1.1       cgd int	ip_nhops = 0;
    372    1.1       cgd static	struct ip_srcrt {
    373    1.1       cgd 	struct	in_addr dst;			/* final destination */
    374    1.1       cgd 	char	nop;				/* one NOP to align */
    375    1.1       cgd 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
    376    1.1       cgd 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
    377    1.1       cgd } ip_srcrt;
    378    1.1       cgd 
    379  1.210     perry static void save_rte(u_char *, struct in_addr);
    380   1.35   mycroft 
    381  1.164      matt #ifdef MBUFTRACE
    382  1.234    dogcow struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx");
    383  1.234    dogcow struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx");
    384  1.164      matt #endif
    385  1.164      matt 
    386    1.1       cgd /*
    387  1.194  jonathan  * Compute IP limits derived from the value of nmbclusters.
    388  1.194  jonathan  */
    389  1.194  jonathan static void
    390  1.194  jonathan ip_nmbclusters_changed(void)
    391  1.194  jonathan {
    392  1.194  jonathan 	ip_maxfrags = nmbclusters / 4;
    393  1.194  jonathan 	ip_nmbclusters =  nmbclusters;
    394  1.194  jonathan }
    395  1.194  jonathan 
    396  1.194  jonathan /*
    397    1.1       cgd  * IP initialization: fill in IP protocol switch table.
    398    1.1       cgd  * All protocols not implemented in kernel go to raw IP protocol handler.
    399    1.1       cgd  */
    400    1.8   mycroft void
    401  1.211     perry ip_init(void)
    402    1.1       cgd {
    403  1.199      matt 	const struct protosw *pr;
    404  1.109  augustss 	int i;
    405    1.1       cgd 
    406    1.1       cgd 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
    407    1.1       cgd 	if (pr == 0)
    408    1.1       cgd 		panic("ip_init");
    409    1.1       cgd 	for (i = 0; i < IPPROTO_MAX; i++)
    410    1.1       cgd 		ip_protox[i] = pr - inetsw;
    411    1.1       cgd 	for (pr = inetdomain.dom_protosw;
    412    1.1       cgd 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
    413    1.1       cgd 		if (pr->pr_domain->dom_family == PF_INET &&
    414    1.1       cgd 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
    415    1.1       cgd 			ip_protox[pr->pr_protocol] = pr - inetsw;
    416  1.192  jonathan 
    417  1.190  jonathan 	for (i = 0; i < IPREASS_NHASH; i++)
    418  1.190  jonathan 	    	LIST_INIT(&ipq[i]);
    419  1.190  jonathan 
    420  1.227    kardel 	ip_id = time_second & 0xfffff;
    421  1.194  jonathan 
    422    1.1       cgd 	ipintrq.ifq_maxlen = ipqmaxlen;
    423  1.194  jonathan 	ip_nmbclusters_changed();
    424  1.194  jonathan 
    425  1.181  jonathan 	TAILQ_INIT(&in_ifaddrhead);
    426  1.120        ad 	in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
    427  1.120        ad 	    M_WAITOK, &in_ifaddrhash);
    428  1.166      matt 	in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IPMADDR,
    429  1.166      matt 	    M_WAITOK, &in_multihash);
    430  1.160    itojun 	ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout);
    431   1.73   thorpej #ifdef GATEWAY
    432  1.248  liamjfoy 	ipflow_init(ip_hashsize);
    433   1.73   thorpej #endif
    434  1.121   thorpej 
    435  1.121   thorpej #ifdef PFIL_HOOKS
    436  1.121   thorpej 	/* Register our Packet Filter hook. */
    437  1.126   thorpej 	inet_pfil_hook.ph_type = PFIL_TYPE_AF;
    438  1.126   thorpej 	inet_pfil_hook.ph_af   = AF_INET;
    439  1.121   thorpej 	i = pfil_head_register(&inet_pfil_hook);
    440  1.121   thorpej 	if (i != 0)
    441  1.121   thorpej 		printf("ip_init: WARNING: unable to register pfil hook, "
    442  1.121   thorpej 		    "error %d\n", i);
    443  1.121   thorpej #endif /* PFIL_HOOKS */
    444  1.135   thorpej 
    445  1.164      matt #ifdef MBUFTRACE
    446  1.164      matt 	MOWNER_ATTACH(&ip_tx_mowner);
    447  1.164      matt 	MOWNER_ATTACH(&ip_rx_mowner);
    448  1.164      matt #endif /* MBUFTRACE */
    449    1.1       cgd }
    450    1.1       cgd 
    451  1.229  christos struct	sockaddr_in ipaddr = {
    452  1.229  christos 	.sin_len = sizeof(ipaddr),
    453  1.229  christos 	.sin_family = AF_INET,
    454  1.229  christos };
    455    1.1       cgd struct	route ipforward_rt;
    456    1.1       cgd 
    457    1.1       cgd /*
    458   1.89    itojun  * IP software interrupt routine
    459   1.89    itojun  */
    460   1.89    itojun void
    461  1.211     perry ipintr(void)
    462   1.89    itojun {
    463   1.89    itojun 	int s;
    464   1.89    itojun 	struct mbuf *m;
    465   1.89    itojun 
    466  1.241        ad 	while (!IF_IS_EMPTY(&ipintrq)) {
    467  1.132   thorpej 		s = splnet();
    468   1.89    itojun 		IF_DEQUEUE(&ipintrq, m);
    469   1.89    itojun 		splx(s);
    470   1.89    itojun 		if (m == 0)
    471   1.89    itojun 			return;
    472  1.164      matt 		MCLAIM(m, &ip_rx_mowner);
    473   1.89    itojun 		ip_input(m);
    474   1.89    itojun 	}
    475   1.89    itojun }
    476   1.89    itojun 
    477   1.89    itojun /*
    478    1.1       cgd  * Ip input routine.  Checksum and byte swap header.  If fragmented
    479    1.1       cgd  * try to reassemble.  Process options.  Pass to next level.
    480    1.1       cgd  */
    481    1.8   mycroft void
    482   1.89    itojun ip_input(struct mbuf *m)
    483    1.1       cgd {
    484  1.109  augustss 	struct ip *ip = NULL;
    485  1.109  augustss 	struct ipq *fp;
    486  1.109  augustss 	struct in_ifaddr *ia;
    487  1.109  augustss 	struct ifaddr *ifa;
    488   1.25       cgd 	struct ipqent *ipqe;
    489   1.89    itojun 	int hlen = 0, mff, len;
    490  1.100    itojun 	int downmatch;
    491  1.165  christos 	int checkif;
    492  1.169    itojun 	int srcrt = 0;
    493  1.233       tls 	int s;
    494  1.190  jonathan 	u_int hash;
    495  1.173  jonathan #ifdef FAST_IPSEC
    496  1.173  jonathan 	struct m_tag *mtag;
    497  1.173  jonathan 	struct tdb_ident *tdbi;
    498  1.173  jonathan 	struct secpolicy *sp;
    499  1.233       tls 	int error;
    500  1.173  jonathan #endif /* FAST_IPSEC */
    501    1.1       cgd 
    502  1.164      matt 	MCLAIM(m, &ip_rx_mowner);
    503    1.1       cgd #ifdef	DIAGNOSTIC
    504    1.1       cgd 	if ((m->m_flags & M_PKTHDR) == 0)
    505    1.1       cgd 		panic("ipintr no HDR");
    506   1.89    itojun #endif
    507  1.164      matt 
    508    1.1       cgd 	/*
    509    1.1       cgd 	 * If no IP addresses have been set yet but the interfaces
    510    1.1       cgd 	 * are receiving, can't do anything with incoming packets yet.
    511    1.1       cgd 	 */
    512  1.181  jonathan 	if (TAILQ_FIRST(&in_ifaddrhead) == 0)
    513    1.1       cgd 		goto bad;
    514    1.1       cgd 	ipstat.ips_total++;
    515  1.154   thorpej 	/*
    516  1.154   thorpej 	 * If the IP header is not aligned, slurp it up into a new
    517  1.154   thorpej 	 * mbuf with space for link headers, in the event we forward
    518  1.154   thorpej 	 * it.  Otherwise, if it is aligned, make sure the entire
    519  1.154   thorpej 	 * base IP header is in the first mbuf of the chain.
    520  1.154   thorpej 	 */
    521  1.244  christos 	if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
    522  1.154   thorpej 		if ((m = m_copyup(m, sizeof(struct ip),
    523  1.154   thorpej 				  (max_linkhdr + 3) & ~3)) == NULL) {
    524  1.154   thorpej 			/* XXXJRT new stat, please */
    525  1.154   thorpej 			ipstat.ips_toosmall++;
    526  1.154   thorpej 			return;
    527  1.154   thorpej 		}
    528  1.154   thorpej 	} else if (__predict_false(m->m_len < sizeof (struct ip))) {
    529  1.154   thorpej 		if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
    530  1.154   thorpej 			ipstat.ips_toosmall++;
    531  1.154   thorpej 			return;
    532  1.154   thorpej 		}
    533    1.1       cgd 	}
    534    1.1       cgd 	ip = mtod(m, struct ip *);
    535   1.13   mycroft 	if (ip->ip_v != IPVERSION) {
    536   1.13   mycroft 		ipstat.ips_badvers++;
    537   1.13   mycroft 		goto bad;
    538   1.13   mycroft 	}
    539    1.1       cgd 	hlen = ip->ip_hl << 2;
    540    1.1       cgd 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
    541    1.1       cgd 		ipstat.ips_badhlen++;
    542    1.1       cgd 		goto bad;
    543    1.1       cgd 	}
    544    1.1       cgd 	if (hlen > m->m_len) {
    545    1.1       cgd 		if ((m = m_pullup(m, hlen)) == 0) {
    546    1.1       cgd 			ipstat.ips_badhlen++;
    547   1.89    itojun 			return;
    548    1.1       cgd 		}
    549    1.1       cgd 		ip = mtod(m, struct ip *);
    550    1.1       cgd 	}
    551   1.98   thorpej 
    552   1.85       hwr 	/*
    553   1.99   thorpej 	 * RFC1122: packets with a multicast source address are
    554   1.98   thorpej 	 * not allowed.
    555   1.85       hwr 	 */
    556   1.85       hwr 	if (IN_MULTICAST(ip->ip_src.s_addr)) {
    557  1.130    itojun 		ipstat.ips_badaddr++;
    558   1.85       hwr 		goto bad;
    559  1.129    itojun 	}
    560  1.129    itojun 
    561  1.129    itojun 	/* 127/8 must not appear on wire - RFC1122 */
    562  1.129    itojun 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
    563  1.129    itojun 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
    564  1.130    itojun 		if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
    565  1.130    itojun 			ipstat.ips_badaddr++;
    566  1.129    itojun 			goto bad;
    567  1.130    itojun 		}
    568   1.85       hwr 	}
    569   1.85       hwr 
    570  1.135   thorpej 	switch (m->m_pkthdr.csum_flags &
    571  1.137   thorpej 		((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
    572  1.135   thorpej 		 M_CSUM_IPv4_BAD)) {
    573  1.135   thorpej 	case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
    574  1.135   thorpej 		INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
    575  1.135   thorpej 		goto badcsum;
    576  1.135   thorpej 
    577  1.135   thorpej 	case M_CSUM_IPv4:
    578  1.135   thorpej 		/* Checksum was okay. */
    579  1.135   thorpej 		INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
    580  1.135   thorpej 		break;
    581  1.135   thorpej 
    582  1.135   thorpej 	default:
    583  1.206   thorpej 		/*
    584  1.206   thorpej 		 * Must compute it ourselves.  Maybe skip checksum on
    585  1.206   thorpej 		 * loopback interfaces.
    586  1.206   thorpej 		 */
    587  1.206   thorpej 		if (__predict_true(!(m->m_pkthdr.rcvif->if_flags &
    588  1.206   thorpej 				     IFF_LOOPBACK) || ip_do_loopback_cksum)) {
    589  1.206   thorpej 			INET_CSUM_COUNTER_INCR(&ip_swcsum);
    590  1.206   thorpej 			if (in_cksum(m, hlen) != 0)
    591  1.206   thorpej 				goto badcsum;
    592  1.206   thorpej 		}
    593  1.135   thorpej 		break;
    594    1.1       cgd 	}
    595    1.1       cgd 
    596  1.121   thorpej 	/* Retrieve the packet length. */
    597  1.121   thorpej 	len = ntohs(ip->ip_len);
    598   1.81     proff 
    599   1.81     proff 	/*
    600   1.81     proff 	 * Check for additional length bogosity
    601   1.81     proff 	 */
    602   1.84     proff 	if (len < hlen) {
    603   1.81     proff 	 	ipstat.ips_badlen++;
    604   1.81     proff 		goto bad;
    605   1.81     proff 	}
    606    1.1       cgd 
    607    1.1       cgd 	/*
    608    1.1       cgd 	 * Check that the amount of data in the buffers
    609    1.1       cgd 	 * is as at least much as the IP header would have us expect.
    610    1.1       cgd 	 * Trim mbufs if longer than we expect.
    611    1.1       cgd 	 * Drop packet if shorter than we expect.
    612    1.1       cgd 	 */
    613   1.35   mycroft 	if (m->m_pkthdr.len < len) {
    614    1.1       cgd 		ipstat.ips_tooshort++;
    615    1.1       cgd 		goto bad;
    616    1.1       cgd 	}
    617   1.35   mycroft 	if (m->m_pkthdr.len > len) {
    618    1.1       cgd 		if (m->m_len == m->m_pkthdr.len) {
    619   1.35   mycroft 			m->m_len = len;
    620   1.35   mycroft 			m->m_pkthdr.len = len;
    621    1.1       cgd 		} else
    622   1.35   mycroft 			m_adj(m, len - m->m_pkthdr.len);
    623    1.1       cgd 	}
    624    1.1       cgd 
    625  1.193       scw #if defined(IPSEC)
    626  1.149       wiz 	/* ipflow (IP fast forwarding) is not compatible with IPsec. */
    627   1.94    itojun 	m->m_flags &= ~M_CANFASTFWD;
    628   1.94    itojun #else
    629   1.64   thorpej 	/*
    630   1.64   thorpej 	 * Assume that we can create a fast-forward IP flow entry
    631   1.64   thorpej 	 * based on this packet.
    632   1.64   thorpej 	 */
    633   1.64   thorpej 	m->m_flags |= M_CANFASTFWD;
    634   1.94    itojun #endif
    635   1.64   thorpej 
    636   1.36       mrg #ifdef PFIL_HOOKS
    637   1.33       mrg 	/*
    638   1.64   thorpej 	 * Run through list of hooks for input packets.  If there are any
    639   1.64   thorpej 	 * filters which require that additional packets in the flow are
    640   1.64   thorpej 	 * not fast-forwarded, they must clear the M_CANFASTFWD flag.
    641   1.64   thorpej 	 * Note that filters must _never_ set this flag, as another filter
    642   1.64   thorpej 	 * in the list may have previously cleared it.
    643   1.33       mrg 	 */
    644  1.127    itojun 	/*
    645  1.127    itojun 	 * let ipfilter look at packet on the wire,
    646  1.127    itojun 	 * not the decapsulated packet.
    647  1.127    itojun 	 */
    648  1.127    itojun #ifdef IPSEC
    649  1.136    itojun 	if (!ipsec_getnhist(m))
    650  1.186       scw #elif defined(FAST_IPSEC)
    651  1.186       scw 	if (!ipsec_indone(m))
    652  1.127    itojun #else
    653  1.127    itojun 	if (1)
    654  1.127    itojun #endif
    655  1.127    itojun 	{
    656  1.169    itojun 		struct in_addr odst;
    657  1.169    itojun 
    658  1.169    itojun 		odst = ip->ip_dst;
    659  1.127    itojun 		if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
    660  1.168    itojun 		    PFIL_IN) != 0)
    661  1.168    itojun 			return;
    662  1.127    itojun 		if (m == NULL)
    663  1.127    itojun 			return;
    664  1.127    itojun 		ip = mtod(m, struct ip *);
    665  1.142   darrenr 		hlen = ip->ip_hl << 2;
    666  1.205   darrenr 		/*
    667  1.205   darrenr 		 * XXX The setting of "srcrt" here is to prevent ip_forward()
    668  1.205   darrenr 		 * from generating ICMP redirects for packets that have
    669  1.205   darrenr 		 * been redirected by a hook back out on to the same LAN that
    670  1.205   darrenr 		 * they came from and is not an indication that the packet
    671  1.205   darrenr 		 * is being inffluenced by source routing options.  This
    672  1.205   darrenr 		 * allows things like
    673  1.205   darrenr 		 * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp"
    674  1.205   darrenr 		 * where tlp0 is both on the 1.1.1.0/24 network and is the
    675  1.205   darrenr 		 * default route for hosts on 1.1.1.0/24.  Of course this
    676  1.205   darrenr 		 * also requires a "map tlp0 ..." to complete the story.
    677  1.205   darrenr 		 * One might argue whether or not this kind of network config.
    678  1.212     perry 		 * should be supported in this manner...
    679  1.205   darrenr 		 */
    680  1.169    itojun 		srcrt = (odst.s_addr != ip->ip_dst.s_addr);
    681  1.127    itojun 	}
    682   1.36       mrg #endif /* PFIL_HOOKS */
    683  1.123   thorpej 
    684  1.123   thorpej #ifdef ALTQ
    685  1.123   thorpej 	/* XXX Temporary until ALTQ is changed to use a pfil hook */
    686  1.123   thorpej 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) {
    687  1.123   thorpej 		/* packet dropped by traffic conditioner */
    688  1.123   thorpej 		return;
    689  1.123   thorpej 	}
    690  1.123   thorpej #endif
    691  1.121   thorpej 
    692  1.121   thorpej 	/*
    693    1.1       cgd 	 * Process options and, if not destined for us,
    694    1.1       cgd 	 * ship it on.  ip_dooptions returns 1 when an
    695    1.1       cgd 	 * error was detected (causing an icmp message
    696    1.1       cgd 	 * to be sent and the original packet to be freed).
    697    1.1       cgd 	 */
    698    1.1       cgd 	ip_nhops = 0;		/* for source routed packets */
    699    1.1       cgd 	if (hlen > sizeof (struct ip) && ip_dooptions(m))
    700   1.89    itojun 		return;
    701    1.1       cgd 
    702    1.1       cgd 	/*
    703  1.165  christos 	 * Enable a consistency check between the destination address
    704  1.165  christos 	 * and the arrival interface for a unicast packet (the RFC 1122
    705  1.165  christos 	 * strong ES model) if IP forwarding is disabled and the packet
    706  1.165  christos 	 * is not locally generated.
    707  1.165  christos 	 *
    708  1.165  christos 	 * XXX - Checking also should be disabled if the destination
    709  1.165  christos 	 * address is ipnat'ed to a different interface.
    710  1.165  christos 	 *
    711  1.165  christos 	 * XXX - Checking is incompatible with IP aliases added
    712  1.165  christos 	 * to the loopback interface instead of the interface where
    713  1.165  christos 	 * the packets are received.
    714  1.165  christos 	 *
    715  1.165  christos 	 * XXX - We need to add a per ifaddr flag for this so that
    716  1.165  christos 	 * we get finer grain control.
    717  1.165  christos 	 */
    718  1.165  christos 	checkif = ip_checkinterface && (ipforwarding == 0) &&
    719  1.165  christos 	    (m->m_pkthdr.rcvif != NULL) &&
    720  1.165  christos 	    ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0);
    721  1.165  christos 
    722  1.165  christos 	/*
    723    1.1       cgd 	 * Check our list of addresses, to see if the packet is for us.
    724  1.100    itojun 	 *
    725  1.100    itojun 	 * Traditional 4.4BSD did not consult IFF_UP at all.
    726  1.100    itojun 	 * The behavior here is to treat addresses on !IFF_UP interface
    727  1.100    itojun 	 * as not mine.
    728    1.1       cgd 	 */
    729  1.100    itojun 	downmatch = 0;
    730  1.140      matt 	LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
    731   1.97    itojun 		if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
    732  1.165  christos 			if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif)
    733  1.165  christos 				continue;
    734   1.97    itojun 			if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
    735   1.97    itojun 				break;
    736  1.100    itojun 			else
    737  1.100    itojun 				downmatch++;
    738   1.97    itojun 		}
    739   1.97    itojun 	}
    740   1.86   thorpej 	if (ia != NULL)
    741   1.86   thorpej 		goto ours;
    742  1.225  christos 	if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
    743  1.209      matt 		IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) {
    744  1.140      matt 			if (ifa->ifa_addr->sa_family != AF_INET)
    745  1.140      matt 				continue;
    746   1.57       tls 			ia = ifatoia(ifa);
    747   1.35   mycroft 			if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
    748   1.35   mycroft 			    in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
    749   1.20   mycroft 			    /*
    750   1.20   mycroft 			     * Look for all-0's host part (old broadcast addr),
    751   1.20   mycroft 			     * either for subnet or net.
    752   1.20   mycroft 			     */
    753   1.20   mycroft 			    ip->ip_dst.s_addr == ia->ia_subnet ||
    754   1.18   mycroft 			    ip->ip_dst.s_addr == ia->ia_net)
    755    1.1       cgd 				goto ours;
    756   1.57       tls 			/*
    757   1.57       tls 			 * An interface with IP address zero accepts
    758   1.57       tls 			 * all packets that arrive on that interface.
    759   1.57       tls 			 */
    760   1.57       tls 			if (in_nullhost(ia->ia_addr.sin_addr))
    761   1.57       tls 				goto ours;
    762    1.1       cgd 		}
    763    1.1       cgd 	}
    764   1.18   mycroft 	if (IN_MULTICAST(ip->ip_dst.s_addr)) {
    765    1.4   hpeyerl 		struct in_multi *inm;
    766    1.4   hpeyerl #ifdef MROUTING
    767    1.4   hpeyerl 		extern struct socket *ip_mrouter;
    768   1.10    brezak 
    769    1.4   hpeyerl 		if (ip_mrouter) {
    770    1.4   hpeyerl 			/*
    771    1.4   hpeyerl 			 * If we are acting as a multicast router, all
    772    1.4   hpeyerl 			 * incoming multicast packets are passed to the
    773    1.4   hpeyerl 			 * kernel-level multicast forwarding function.
    774    1.4   hpeyerl 			 * The packet is returned (relatively) intact; if
    775    1.4   hpeyerl 			 * ip_mforward() returns a non-zero value, the packet
    776    1.4   hpeyerl 			 * must be discarded, else it may be accepted below.
    777    1.4   hpeyerl 			 *
    778    1.4   hpeyerl 			 * (The IP ident field is put in the same byte order
    779    1.4   hpeyerl 			 * as expected when ip_mforward() is called from
    780    1.4   hpeyerl 			 * ip_output().)
    781    1.4   hpeyerl 			 */
    782   1.13   mycroft 			if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
    783   1.13   mycroft 				ipstat.ips_cantforward++;
    784    1.4   hpeyerl 				m_freem(m);
    785   1.89    itojun 				return;
    786    1.4   hpeyerl 			}
    787    1.4   hpeyerl 
    788    1.4   hpeyerl 			/*
    789    1.4   hpeyerl 			 * The process-level routing demon needs to receive
    790    1.4   hpeyerl 			 * all multicast IGMP packets, whether or not this
    791    1.4   hpeyerl 			 * host belongs to their destination groups.
    792    1.4   hpeyerl 			 */
    793    1.4   hpeyerl 			if (ip->ip_p == IPPROTO_IGMP)
    794    1.4   hpeyerl 				goto ours;
    795   1.13   mycroft 			ipstat.ips_forward++;
    796    1.4   hpeyerl 		}
    797    1.4   hpeyerl #endif
    798    1.4   hpeyerl 		/*
    799    1.4   hpeyerl 		 * See if we belong to the destination multicast group on the
    800    1.4   hpeyerl 		 * arrival interface.
    801    1.4   hpeyerl 		 */
    802    1.4   hpeyerl 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
    803    1.4   hpeyerl 		if (inm == NULL) {
    804   1.13   mycroft 			ipstat.ips_cantforward++;
    805    1.4   hpeyerl 			m_freem(m);
    806   1.89    itojun 			return;
    807    1.4   hpeyerl 		}
    808    1.4   hpeyerl 		goto ours;
    809    1.4   hpeyerl 	}
    810   1.19   mycroft 	if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
    811   1.35   mycroft 	    in_nullhost(ip->ip_dst))
    812    1.1       cgd 		goto ours;
    813    1.1       cgd 
    814    1.1       cgd 	/*
    815    1.1       cgd 	 * Not for us; forward if possible and desirable.
    816    1.1       cgd 	 */
    817    1.1       cgd 	if (ipforwarding == 0) {
    818    1.1       cgd 		ipstat.ips_cantforward++;
    819    1.1       cgd 		m_freem(m);
    820  1.100    itojun 	} else {
    821  1.100    itojun 		/*
    822  1.100    itojun 		 * If ip_dst matched any of my address on !IFF_UP interface,
    823  1.100    itojun 		 * and there's no IFF_UP interface that matches ip_dst,
    824  1.100    itojun 		 * send icmp unreach.  Forwarding it will result in in-kernel
    825  1.100    itojun 		 * forwarding loop till TTL goes to 0.
    826  1.100    itojun 		 */
    827  1.100    itojun 		if (downmatch) {
    828  1.100    itojun 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
    829  1.100    itojun 			ipstat.ips_cantforward++;
    830  1.100    itojun 			return;
    831  1.100    itojun 		}
    832  1.145    itojun #ifdef IPSEC
    833  1.145    itojun 		if (ipsec4_in_reject(m, NULL)) {
    834  1.145    itojun 			ipsecstat.in_polvio++;
    835  1.145    itojun 			goto bad;
    836  1.145    itojun 		}
    837  1.145    itojun #endif
    838  1.173  jonathan #ifdef FAST_IPSEC
    839  1.173  jonathan 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
    840  1.173  jonathan 		s = splsoftnet();
    841  1.173  jonathan 		if (mtag != NULL) {
    842  1.173  jonathan 			tdbi = (struct tdb_ident *)(mtag + 1);
    843  1.173  jonathan 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
    844  1.173  jonathan 		} else {
    845  1.173  jonathan 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
    846  1.212     perry 						   IP_FORWARDING, &error);
    847  1.173  jonathan 		}
    848  1.173  jonathan 		if (sp == NULL) {	/* NB: can happen if error */
    849  1.173  jonathan 			splx(s);
    850  1.173  jonathan 			/*XXX error stat???*/
    851  1.173  jonathan 			DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
    852  1.173  jonathan 			goto bad;
    853  1.173  jonathan 		}
    854  1.173  jonathan 
    855  1.173  jonathan 		/*
    856  1.173  jonathan 		 * Check security policy against packet attributes.
    857  1.173  jonathan 		 */
    858  1.173  jonathan 		error = ipsec_in_reject(sp, m);
    859  1.173  jonathan 		KEY_FREESP(&sp);
    860  1.173  jonathan 		splx(s);
    861  1.173  jonathan 		if (error) {
    862  1.173  jonathan 			ipstat.ips_cantforward++;
    863  1.173  jonathan 			goto bad;
    864  1.193       scw 		}
    865  1.193       scw 
    866  1.193       scw 		/*
    867  1.193       scw 		 * Peek at the outbound SP for this packet to determine if
    868  1.193       scw 		 * it's a Fast Forward candidate.
    869  1.193       scw 		 */
    870  1.193       scw 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
    871  1.193       scw 		if (mtag != NULL)
    872  1.193       scw 			m->m_flags &= ~M_CANFASTFWD;
    873  1.193       scw 		else {
    874  1.193       scw 			s = splsoftnet();
    875  1.193       scw 			sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND,
    876  1.193       scw 			    (IP_FORWARDING |
    877  1.193       scw 			     (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
    878  1.193       scw 			    &error, NULL);
    879  1.193       scw 			if (sp != NULL) {
    880  1.193       scw 				m->m_flags &= ~M_CANFASTFWD;
    881  1.193       scw 				KEY_FREESP(&sp);
    882  1.193       scw 			}
    883  1.193       scw 			splx(s);
    884  1.173  jonathan 		}
    885  1.173  jonathan #endif	/* FAST_IPSEC */
    886  1.145    itojun 
    887  1.169    itojun 		ip_forward(m, srcrt);
    888  1.100    itojun 	}
    889   1.89    itojun 	return;
    890    1.1       cgd 
    891    1.1       cgd ours:
    892    1.1       cgd 	/*
    893    1.1       cgd 	 * If offset or IP_MF are set, must reassemble.
    894    1.1       cgd 	 * Otherwise, nothing need be done.
    895    1.1       cgd 	 * (We could look in the reassembly queue to see
    896    1.1       cgd 	 * if the packet was previously fragmented,
    897    1.1       cgd 	 * but it's not worth the time; just let them time out.)
    898    1.1       cgd 	 */
    899  1.155    itojun 	if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
    900  1.155    itojun 
    901    1.1       cgd 		/*
    902    1.1       cgd 		 * Look for queue of fragments
    903    1.1       cgd 		 * of this datagram.
    904    1.1       cgd 		 */
    905   1.75   thorpej 		IPQ_LOCK();
    906  1.190  jonathan 		hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
    907  1.190  jonathan 		/* XXX LIST_FOREACH(fp, &ipq[hash], ipq_q) */
    908  1.190  jonathan 		for (fp = LIST_FIRST(&ipq[hash]); fp != NULL;
    909  1.190  jonathan 		     fp = LIST_NEXT(fp, ipq_q)) {
    910    1.1       cgd 			if (ip->ip_id == fp->ipq_id &&
    911   1.35   mycroft 			    in_hosteq(ip->ip_src, fp->ipq_src) &&
    912   1.35   mycroft 			    in_hosteq(ip->ip_dst, fp->ipq_dst) &&
    913    1.1       cgd 			    ip->ip_p == fp->ipq_p)
    914    1.1       cgd 				goto found;
    915  1.190  jonathan 
    916  1.190  jonathan 		}
    917    1.1       cgd 		fp = 0;
    918    1.1       cgd found:
    919    1.1       cgd 
    920    1.1       cgd 		/*
    921    1.1       cgd 		 * Adjust ip_len to not reflect header,
    922   1.25       cgd 		 * set ipqe_mff if more fragments are expected,
    923    1.1       cgd 		 * convert offset of this to bytes.
    924    1.1       cgd 		 */
    925  1.155    itojun 		ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
    926  1.155    itojun 		mff = (ip->ip_off & htons(IP_MF)) != 0;
    927   1.25       cgd 		if (mff) {
    928   1.16       cgd 		        /*
    929   1.16       cgd 		         * Make sure that fragments have a data length
    930   1.16       cgd 			 * that's a non-zero multiple of 8 bytes.
    931   1.16       cgd 		         */
    932  1.155    itojun 			if (ntohs(ip->ip_len) == 0 ||
    933  1.155    itojun 			    (ntohs(ip->ip_len) & 0x7) != 0) {
    934   1.16       cgd 				ipstat.ips_badfrags++;
    935   1.75   thorpej 				IPQ_UNLOCK();
    936   1.16       cgd 				goto bad;
    937   1.16       cgd 			}
    938   1.16       cgd 		}
    939  1.155    itojun 		ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3);
    940    1.1       cgd 
    941    1.1       cgd 		/*
    942    1.1       cgd 		 * If datagram marked as having more fragments
    943    1.1       cgd 		 * or if this is not the first fragment,
    944    1.1       cgd 		 * attempt reassembly; if it succeeds, proceed.
    945    1.1       cgd 		 */
    946  1.155    itojun 		if (mff || ip->ip_off != htons(0)) {
    947    1.1       cgd 			ipstat.ips_fragments++;
    948  1.233       tls 			s = splvm();
    949   1.72   thorpej 			ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
    950  1.233       tls 			splx(s);
    951   1.25       cgd 			if (ipqe == NULL) {
    952   1.25       cgd 				ipstat.ips_rcvmemdrop++;
    953   1.75   thorpej 				IPQ_UNLOCK();
    954   1.25       cgd 				goto bad;
    955   1.25       cgd 			}
    956   1.25       cgd 			ipqe->ipqe_mff = mff;
    957   1.50   thorpej 			ipqe->ipqe_m = m;
    958   1.25       cgd 			ipqe->ipqe_ip = ip;
    959  1.190  jonathan 			m = ip_reass(ipqe, fp, &ipq[hash]);
    960   1.75   thorpej 			if (m == 0) {
    961   1.75   thorpej 				IPQ_UNLOCK();
    962   1.89    itojun 				return;
    963   1.75   thorpej 			}
    964   1.13   mycroft 			ipstat.ips_reassembled++;
    965   1.50   thorpej 			ip = mtod(m, struct ip *);
    966   1.74   thorpej 			hlen = ip->ip_hl << 2;
    967  1.155    itojun 			ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
    968    1.1       cgd 		} else
    969    1.1       cgd 			if (fp)
    970    1.1       cgd 				ip_freef(fp);
    971   1.75   thorpej 		IPQ_UNLOCK();
    972   1.79   mycroft 	}
    973  1.128    itojun 
    974  1.173  jonathan #if defined(IPSEC)
    975  1.128    itojun 	/*
    976  1.128    itojun 	 * enforce IPsec policy checking if we are seeing last header.
    977  1.128    itojun 	 * note that we do not visit this with protocols with pcb layer
    978  1.128    itojun 	 * code - like udp/tcp/raw ip.
    979  1.128    itojun 	 */
    980  1.128    itojun 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
    981  1.128    itojun 	    ipsec4_in_reject(m, NULL)) {
    982  1.128    itojun 		ipsecstat.in_polvio++;
    983  1.128    itojun 		goto bad;
    984  1.128    itojun 	}
    985  1.128    itojun #endif
    986  1.226  liamjfoy #ifdef FAST_IPSEC
    987  1.173  jonathan 	/*
    988  1.173  jonathan 	 * enforce IPsec policy checking if we are seeing last header.
    989  1.173  jonathan 	 * note that we do not visit this with protocols with pcb layer
    990  1.173  jonathan 	 * code - like udp/tcp/raw ip.
    991  1.173  jonathan 	 */
    992  1.173  jonathan 	if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
    993  1.173  jonathan 		/*
    994  1.173  jonathan 		 * Check if the packet has already had IPsec processing
    995  1.173  jonathan 		 * done.  If so, then just pass it along.  This tag gets
    996  1.173  jonathan 		 * set during AH, ESP, etc. input handling, before the
    997  1.173  jonathan 		 * packet is returned to the ip input queue for delivery.
    998  1.212     perry 		 */
    999  1.173  jonathan 		mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
   1000  1.173  jonathan 		s = splsoftnet();
   1001  1.173  jonathan 		if (mtag != NULL) {
   1002  1.173  jonathan 			tdbi = (struct tdb_ident *)(mtag + 1);
   1003  1.173  jonathan 			sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
   1004  1.173  jonathan 		} else {
   1005  1.173  jonathan 			sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
   1006  1.212     perry 						   IP_FORWARDING, &error);
   1007  1.173  jonathan 		}
   1008  1.173  jonathan 		if (sp != NULL) {
   1009  1.173  jonathan 			/*
   1010  1.173  jonathan 			 * Check security policy against packet attributes.
   1011  1.173  jonathan 			 */
   1012  1.173  jonathan 			error = ipsec_in_reject(sp, m);
   1013  1.173  jonathan 			KEY_FREESP(&sp);
   1014  1.173  jonathan 		} else {
   1015  1.173  jonathan 			/* XXX error stat??? */
   1016  1.173  jonathan 			error = EINVAL;
   1017  1.173  jonathan DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
   1018  1.173  jonathan 			goto bad;
   1019  1.173  jonathan 		}
   1020  1.173  jonathan 		splx(s);
   1021  1.173  jonathan 		if (error)
   1022  1.173  jonathan 			goto bad;
   1023  1.173  jonathan 	}
   1024  1.173  jonathan #endif /* FAST_IPSEC */
   1025    1.1       cgd 
   1026    1.1       cgd 	/*
   1027    1.1       cgd 	 * Switch out to protocol's input routine.
   1028    1.1       cgd 	 */
   1029   1.82     aidan #if IFA_STATS
   1030  1.122    itojun 	if (ia && ip)
   1031  1.155    itojun 		ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len);
   1032   1.82     aidan #endif
   1033    1.1       cgd 	ipstat.ips_delivered++;
   1034   1.89    itojun     {
   1035   1.89    itojun 	int off = hlen, nh = ip->ip_p;
   1036   1.89    itojun 
   1037   1.89    itojun 	(*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
   1038   1.89    itojun 	return;
   1039   1.89    itojun     }
   1040    1.1       cgd bad:
   1041    1.1       cgd 	m_freem(m);
   1042  1.135   thorpej 	return;
   1043  1.135   thorpej 
   1044  1.135   thorpej badcsum:
   1045  1.135   thorpej 	ipstat.ips_badsum++;
   1046  1.135   thorpej 	m_freem(m);
   1047    1.1       cgd }
   1048    1.1       cgd 
   1049    1.1       cgd /*
   1050    1.1       cgd  * Take incoming datagram fragment and try to
   1051    1.1       cgd  * reassemble it into whole datagram.  If a chain for
   1052    1.1       cgd  * reassembly of this datagram already exists, then it
   1053    1.1       cgd  * is given as fp; otherwise have to make a chain.
   1054    1.1       cgd  */
   1055   1.50   thorpej struct mbuf *
   1056  1.211     perry ip_reass(struct ipqent *ipqe, struct ipq *fp, struct ipqhead *ipqhead)
   1057    1.1       cgd {
   1058  1.109  augustss 	struct mbuf *m = ipqe->ipqe_m;
   1059  1.109  augustss 	struct ipqent *nq, *p, *q;
   1060   1.25       cgd 	struct ip *ip;
   1061    1.1       cgd 	struct mbuf *t;
   1062   1.25       cgd 	int hlen = ipqe->ipqe_ip->ip_hl << 2;
   1063  1.233       tls 	int i, next, s;
   1064    1.1       cgd 
   1065   1.75   thorpej 	IPQ_LOCK_CHECK();
   1066   1.75   thorpej 
   1067    1.1       cgd 	/*
   1068    1.1       cgd 	 * Presence of header sizes in mbufs
   1069    1.1       cgd 	 * would confuse code below.
   1070    1.1       cgd 	 */
   1071    1.1       cgd 	m->m_data += hlen;
   1072    1.1       cgd 	m->m_len -= hlen;
   1073    1.1       cgd 
   1074  1.194  jonathan #ifdef	notyet
   1075  1.194  jonathan 	/* make sure fragment limit is up-to-date */
   1076  1.194  jonathan 	CHECK_NMBCLUSTER_PARAMS();
   1077  1.194  jonathan 
   1078  1.194  jonathan 	/* If we have too many fragments, drop the older half. */
   1079  1.194  jonathan 	if (ip_nfrags >= ip_maxfrags)
   1080  1.194  jonathan 		ip_reass_drophalf(void);
   1081  1.194  jonathan #endif
   1082  1.194  jonathan 
   1083    1.1       cgd 	/*
   1084  1.192  jonathan 	 * We are about to add a fragment; increment frag count.
   1085  1.192  jonathan 	 */
   1086  1.192  jonathan 	ip_nfrags++;
   1087  1.212     perry 
   1088  1.192  jonathan 	/*
   1089    1.1       cgd 	 * If first fragment to arrive, create a reassembly queue.
   1090    1.1       cgd 	 */
   1091    1.1       cgd 	if (fp == 0) {
   1092  1.131    itojun 		/*
   1093  1.131    itojun 		 * Enforce upper bound on number of fragmented packets
   1094  1.131    itojun 		 * for which we attempt reassembly;
   1095  1.131    itojun 		 * If maxfrag is 0, never accept fragments.
   1096  1.131    itojun 		 * If maxfrag is -1, accept all fragments without limitation.
   1097  1.131    itojun 		 */
   1098  1.131    itojun 		if (ip_maxfragpackets < 0)
   1099  1.131    itojun 			;
   1100  1.131    itojun 		else if (ip_nfragpackets >= ip_maxfragpackets)
   1101  1.131    itojun 			goto dropfrag;
   1102  1.131    itojun 		ip_nfragpackets++;
   1103   1.50   thorpej 		MALLOC(fp, struct ipq *, sizeof (struct ipq),
   1104   1.50   thorpej 		    M_FTABLE, M_NOWAIT);
   1105   1.50   thorpej 		if (fp == NULL)
   1106    1.1       cgd 			goto dropfrag;
   1107  1.190  jonathan 		LIST_INSERT_HEAD(ipqhead, fp, ipq_q);
   1108  1.192  jonathan 		fp->ipq_nfrags = 1;
   1109    1.1       cgd 		fp->ipq_ttl = IPFRAGTTL;
   1110   1.25       cgd 		fp->ipq_p = ipqe->ipqe_ip->ip_p;
   1111   1.25       cgd 		fp->ipq_id = ipqe->ipqe_ip->ip_id;
   1112  1.148      matt 		TAILQ_INIT(&fp->ipq_fragq);
   1113   1.25       cgd 		fp->ipq_src = ipqe->ipqe_ip->ip_src;
   1114   1.25       cgd 		fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
   1115   1.25       cgd 		p = NULL;
   1116    1.1       cgd 		goto insert;
   1117  1.192  jonathan 	} else {
   1118  1.192  jonathan 		fp->ipq_nfrags++;
   1119    1.1       cgd 	}
   1120    1.1       cgd 
   1121    1.1       cgd 	/*
   1122    1.1       cgd 	 * Find a segment which begins after this one does.
   1123    1.1       cgd 	 */
   1124  1.148      matt 	for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
   1125  1.148      matt 	    p = q, q = TAILQ_NEXT(q, ipqe_q))
   1126  1.155    itojun 		if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
   1127    1.1       cgd 			break;
   1128    1.1       cgd 
   1129    1.1       cgd 	/*
   1130    1.1       cgd 	 * If there is a preceding segment, it may provide some of
   1131    1.1       cgd 	 * our data already.  If so, drop the data from the incoming
   1132    1.1       cgd 	 * segment.  If it provides all of our data, drop us.
   1133    1.1       cgd 	 */
   1134   1.25       cgd 	if (p != NULL) {
   1135  1.155    itojun 		i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
   1136  1.155    itojun 		    ntohs(ipqe->ipqe_ip->ip_off);
   1137    1.1       cgd 		if (i > 0) {
   1138  1.155    itojun 			if (i >= ntohs(ipqe->ipqe_ip->ip_len))
   1139    1.1       cgd 				goto dropfrag;
   1140   1.50   thorpej 			m_adj(ipqe->ipqe_m, i);
   1141  1.155    itojun 			ipqe->ipqe_ip->ip_off =
   1142  1.155    itojun 			    htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
   1143  1.155    itojun 			ipqe->ipqe_ip->ip_len =
   1144  1.155    itojun 			    htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
   1145    1.1       cgd 		}
   1146    1.1       cgd 	}
   1147    1.1       cgd 
   1148    1.1       cgd 	/*
   1149    1.1       cgd 	 * While we overlap succeeding segments trim them or,
   1150    1.1       cgd 	 * if they are completely covered, dequeue them.
   1151    1.1       cgd 	 */
   1152  1.155    itojun 	for (; q != NULL &&
   1153  1.155    itojun 	    ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
   1154  1.155    itojun 	    ntohs(q->ipqe_ip->ip_off); q = nq) {
   1155  1.155    itojun 		i = (ntohs(ipqe->ipqe_ip->ip_off) +
   1156  1.155    itojun 		    ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
   1157  1.155    itojun 		if (i < ntohs(q->ipqe_ip->ip_len)) {
   1158  1.155    itojun 			q->ipqe_ip->ip_len =
   1159  1.155    itojun 			    htons(ntohs(q->ipqe_ip->ip_len) - i);
   1160  1.155    itojun 			q->ipqe_ip->ip_off =
   1161  1.155    itojun 			    htons(ntohs(q->ipqe_ip->ip_off) + i);
   1162   1.50   thorpej 			m_adj(q->ipqe_m, i);
   1163    1.1       cgd 			break;
   1164    1.1       cgd 		}
   1165  1.148      matt 		nq = TAILQ_NEXT(q, ipqe_q);
   1166   1.50   thorpej 		m_freem(q->ipqe_m);
   1167  1.148      matt 		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
   1168  1.233       tls 		s = splvm();
   1169   1.72   thorpej 		pool_put(&ipqent_pool, q);
   1170  1.233       tls 		splx(s);
   1171  1.192  jonathan 		fp->ipq_nfrags--;
   1172  1.192  jonathan 		ip_nfrags--;
   1173    1.1       cgd 	}
   1174    1.1       cgd 
   1175    1.1       cgd insert:
   1176    1.1       cgd 	/*
   1177    1.1       cgd 	 * Stick new segment in its place;
   1178    1.1       cgd 	 * check for complete reassembly.
   1179    1.1       cgd 	 */
   1180   1.25       cgd 	if (p == NULL) {
   1181  1.148      matt 		TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
   1182   1.25       cgd 	} else {
   1183  1.148      matt 		TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
   1184   1.25       cgd 	}
   1185    1.1       cgd 	next = 0;
   1186  1.148      matt 	for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
   1187  1.148      matt 	    p = q, q = TAILQ_NEXT(q, ipqe_q)) {
   1188  1.155    itojun 		if (ntohs(q->ipqe_ip->ip_off) != next)
   1189    1.1       cgd 			return (0);
   1190  1.155    itojun 		next += ntohs(q->ipqe_ip->ip_len);
   1191    1.1       cgd 	}
   1192   1.25       cgd 	if (p->ipqe_mff)
   1193    1.1       cgd 		return (0);
   1194    1.1       cgd 
   1195    1.1       cgd 	/*
   1196   1.41   thorpej 	 * Reassembly is complete.  Check for a bogus message size and
   1197   1.41   thorpej 	 * concatenate fragments.
   1198    1.1       cgd 	 */
   1199  1.148      matt 	q = TAILQ_FIRST(&fp->ipq_fragq);
   1200   1.25       cgd 	ip = q->ipqe_ip;
   1201   1.41   thorpej 	if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
   1202   1.41   thorpej 		ipstat.ips_toolong++;
   1203   1.41   thorpej 		ip_freef(fp);
   1204   1.41   thorpej 		return (0);
   1205   1.41   thorpej 	}
   1206   1.50   thorpej 	m = q->ipqe_m;
   1207    1.1       cgd 	t = m->m_next;
   1208    1.1       cgd 	m->m_next = 0;
   1209    1.1       cgd 	m_cat(m, t);
   1210  1.148      matt 	nq = TAILQ_NEXT(q, ipqe_q);
   1211  1.233       tls 	s = splvm();
   1212   1.72   thorpej 	pool_put(&ipqent_pool, q);
   1213  1.233       tls 	splx(s);
   1214   1.25       cgd 	for (q = nq; q != NULL; q = nq) {
   1215   1.50   thorpej 		t = q->ipqe_m;
   1216  1.148      matt 		nq = TAILQ_NEXT(q, ipqe_q);
   1217  1.233       tls 		s = splvm();
   1218   1.72   thorpej 		pool_put(&ipqent_pool, q);
   1219  1.233       tls 		splx(s);
   1220    1.1       cgd 		m_cat(m, t);
   1221    1.1       cgd 	}
   1222  1.192  jonathan 	ip_nfrags -= fp->ipq_nfrags;
   1223    1.1       cgd 
   1224    1.1       cgd 	/*
   1225    1.1       cgd 	 * Create header for new ip packet by
   1226    1.1       cgd 	 * modifying header of first packet;
   1227    1.1       cgd 	 * dequeue and discard fragment reassembly header.
   1228    1.1       cgd 	 * Make header visible.
   1229    1.1       cgd 	 */
   1230  1.155    itojun 	ip->ip_len = htons(next);
   1231   1.25       cgd 	ip->ip_src = fp->ipq_src;
   1232   1.25       cgd 	ip->ip_dst = fp->ipq_dst;
   1233   1.25       cgd 	LIST_REMOVE(fp, ipq_q);
   1234   1.50   thorpej 	FREE(fp, M_FTABLE);
   1235  1.131    itojun 	ip_nfragpackets--;
   1236    1.1       cgd 	m->m_len += (ip->ip_hl << 2);
   1237    1.1       cgd 	m->m_data -= (ip->ip_hl << 2);
   1238    1.1       cgd 	/* some debugging cruft by sklower, below, will go away soon */
   1239    1.1       cgd 	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
   1240  1.109  augustss 		int plen = 0;
   1241   1.50   thorpej 		for (t = m; t; t = t->m_next)
   1242   1.50   thorpej 			plen += t->m_len;
   1243   1.50   thorpej 		m->m_pkthdr.len = plen;
   1244  1.213      yamt 		m->m_pkthdr.csum_flags = 0;
   1245    1.1       cgd 	}
   1246   1.50   thorpej 	return (m);
   1247    1.1       cgd 
   1248    1.1       cgd dropfrag:
   1249  1.192  jonathan 	if (fp != 0)
   1250  1.192  jonathan 		fp->ipq_nfrags--;
   1251  1.192  jonathan 	ip_nfrags--;
   1252    1.1       cgd 	ipstat.ips_fragdropped++;
   1253    1.1       cgd 	m_freem(m);
   1254  1.233       tls 	s = splvm();
   1255   1.72   thorpej 	pool_put(&ipqent_pool, ipqe);
   1256  1.233       tls 	splx(s);
   1257    1.1       cgd 	return (0);
   1258    1.1       cgd }
   1259    1.1       cgd 
   1260    1.1       cgd /*
   1261    1.1       cgd  * Free a fragment reassembly header and all
   1262    1.1       cgd  * associated datagrams.
   1263    1.1       cgd  */
   1264    1.8   mycroft void
   1265  1.211     perry ip_freef(struct ipq *fp)
   1266    1.1       cgd {
   1267  1.109  augustss 	struct ipqent *q, *p;
   1268  1.192  jonathan 	u_int nfrags = 0;
   1269  1.233       tls 	int s;
   1270    1.1       cgd 
   1271   1.75   thorpej 	IPQ_LOCK_CHECK();
   1272   1.75   thorpej 
   1273  1.148      matt 	for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
   1274  1.148      matt 		p = TAILQ_NEXT(q, ipqe_q);
   1275   1.50   thorpej 		m_freem(q->ipqe_m);
   1276  1.192  jonathan 		nfrags++;
   1277  1.148      matt 		TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
   1278  1.233       tls 		s = splvm();
   1279   1.72   thorpej 		pool_put(&ipqent_pool, q);
   1280  1.233       tls 		splx(s);
   1281    1.1       cgd 	}
   1282  1.192  jonathan 
   1283  1.192  jonathan 	if (nfrags != fp->ipq_nfrags)
   1284  1.192  jonathan 	    printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags);
   1285  1.192  jonathan 	ip_nfrags -= nfrags;
   1286   1.25       cgd 	LIST_REMOVE(fp, ipq_q);
   1287   1.50   thorpej 	FREE(fp, M_FTABLE);
   1288  1.131    itojun 	ip_nfragpackets--;
   1289    1.1       cgd }
   1290    1.1       cgd 
   1291    1.1       cgd /*
   1292  1.194  jonathan  * IP reassembly TTL machinery for  multiplicative drop.
   1293  1.194  jonathan  */
   1294  1.194  jonathan static u_int	fragttl_histo[(IPFRAGTTL+1)];
   1295  1.194  jonathan 
   1296  1.194  jonathan 
   1297  1.194  jonathan /*
   1298  1.194  jonathan  * Decrement TTL of all reasembly queue entries by `ticks'.
   1299  1.194  jonathan  * Count number of distinct fragments (as opposed to partial, fragmented
   1300  1.194  jonathan  * datagrams) in the reassembly queue.  While we  traverse the entire
   1301  1.194  jonathan  * reassembly queue, compute and return the median TTL over all fragments.
   1302  1.194  jonathan  */
   1303  1.194  jonathan static u_int
   1304  1.194  jonathan ip_reass_ttl_decr(u_int ticks)
   1305  1.194  jonathan {
   1306  1.198      matt 	u_int nfrags, median, dropfraction, keepfraction;
   1307  1.194  jonathan 	struct ipq *fp, *nfp;
   1308  1.198      matt 	int i;
   1309  1.212     perry 
   1310  1.194  jonathan 	nfrags = 0;
   1311  1.194  jonathan 	memset(fragttl_histo, 0, sizeof fragttl_histo);
   1312  1.212     perry 
   1313  1.194  jonathan 	for (i = 0; i < IPREASS_NHASH; i++) {
   1314  1.194  jonathan 		for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) {
   1315  1.194  jonathan 			fp->ipq_ttl = ((fp->ipq_ttl  <= ticks) ?
   1316  1.194  jonathan 				       0 : fp->ipq_ttl - ticks);
   1317  1.194  jonathan 			nfp = LIST_NEXT(fp, ipq_q);
   1318  1.194  jonathan 			if (fp->ipq_ttl == 0) {
   1319  1.194  jonathan 				ipstat.ips_fragtimeout++;
   1320  1.194  jonathan 				ip_freef(fp);
   1321  1.194  jonathan 			} else {
   1322  1.194  jonathan 				nfrags += fp->ipq_nfrags;
   1323  1.194  jonathan 				fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags;
   1324  1.194  jonathan 			}
   1325  1.194  jonathan 		}
   1326  1.194  jonathan 	}
   1327  1.194  jonathan 
   1328  1.194  jonathan 	KASSERT(ip_nfrags == nfrags);
   1329  1.194  jonathan 
   1330  1.194  jonathan 	/* Find median (or other drop fraction) in histogram. */
   1331  1.194  jonathan 	dropfraction = (ip_nfrags / 2);
   1332  1.194  jonathan 	keepfraction = ip_nfrags - dropfraction;
   1333  1.194  jonathan 	for (i = IPFRAGTTL, median = 0; i >= 0; i--) {
   1334  1.194  jonathan 		median +=  fragttl_histo[i];
   1335  1.194  jonathan 		if (median >= keepfraction)
   1336  1.194  jonathan 			break;
   1337  1.194  jonathan 	}
   1338  1.194  jonathan 
   1339  1.194  jonathan 	/* Return TTL of median (or other fraction). */
   1340  1.194  jonathan 	return (u_int)i;
   1341  1.194  jonathan }
   1342  1.194  jonathan 
   1343  1.194  jonathan void
   1344  1.194  jonathan ip_reass_drophalf(void)
   1345  1.194  jonathan {
   1346  1.194  jonathan 
   1347  1.194  jonathan 	u_int median_ticks;
   1348  1.194  jonathan 	/*
   1349  1.194  jonathan 	 * Compute median TTL of all fragments, and count frags
   1350  1.194  jonathan 	 * with that TTL or lower (roughly half of all fragments).
   1351  1.194  jonathan 	 */
   1352  1.194  jonathan 	median_ticks = ip_reass_ttl_decr(0);
   1353  1.194  jonathan 
   1354  1.194  jonathan 	/* Drop half. */
   1355  1.194  jonathan 	median_ticks = ip_reass_ttl_decr(median_ticks);
   1356  1.194  jonathan 
   1357  1.194  jonathan }
   1358  1.194  jonathan 
   1359  1.194  jonathan /*
   1360    1.1       cgd  * IP timer processing;
   1361    1.1       cgd  * if a timer expires on a reassembly
   1362    1.1       cgd  * queue, discard it.
   1363    1.1       cgd  */
   1364    1.8   mycroft void
   1365  1.211     perry ip_slowtimo(void)
   1366    1.1       cgd {
   1367  1.191  jonathan 	static u_int dropscanidx = 0;
   1368  1.191  jonathan 	u_int i;
   1369  1.194  jonathan 	u_int median_ttl;
   1370   1.24   mycroft 	int s = splsoftnet();
   1371    1.1       cgd 
   1372   1.75   thorpej 	IPQ_LOCK();
   1373  1.194  jonathan 
   1374  1.194  jonathan 	/* Age TTL of all fragments by 1 tick .*/
   1375  1.194  jonathan 	median_ttl = ip_reass_ttl_decr(1);
   1376  1.194  jonathan 
   1377  1.194  jonathan 	/* make sure fragment limit is up-to-date */
   1378  1.194  jonathan 	CHECK_NMBCLUSTER_PARAMS();
   1379  1.194  jonathan 
   1380  1.194  jonathan 	/* If we have too many fragments, drop the older half. */
   1381  1.194  jonathan 	if (ip_nfrags > ip_maxfrags)
   1382  1.194  jonathan 		ip_reass_ttl_decr(median_ttl);
   1383  1.194  jonathan 
   1384  1.131    itojun 	/*
   1385  1.194  jonathan 	 * If we are over the maximum number of fragmented packets
   1386  1.131    itojun 	 * (due to the limit being lowered), drain off
   1387  1.190  jonathan 	 * enough to get down to the new limit. Start draining
   1388  1.190  jonathan 	 * from the reassembly hashqueue most recently drained.
   1389  1.131    itojun 	 */
   1390  1.131    itojun 	if (ip_maxfragpackets < 0)
   1391  1.131    itojun 		;
   1392  1.131    itojun 	else {
   1393  1.190  jonathan 		int wrapped = 0;
   1394  1.190  jonathan 
   1395  1.190  jonathan 		i = dropscanidx;
   1396  1.190  jonathan 		while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) {
   1397  1.190  jonathan 			while (LIST_FIRST(&ipq[i]) != NULL)
   1398  1.190  jonathan 				ip_freef(LIST_FIRST(&ipq[i]));
   1399  1.190  jonathan 			if (++i >= IPREASS_NHASH) {
   1400  1.190  jonathan 				i = 0;
   1401  1.190  jonathan 			}
   1402  1.190  jonathan 			/*
   1403  1.190  jonathan 			 * Dont scan forever even if fragment counters are
   1404  1.190  jonathan 			 * wrong: stop after scanning entire reassembly queue.
   1405  1.190  jonathan 			 */
   1406  1.190  jonathan 			if (i == dropscanidx)
   1407  1.190  jonathan 			    wrapped = 1;
   1408  1.190  jonathan 		}
   1409  1.190  jonathan 		dropscanidx = i;
   1410  1.131    itojun 	}
   1411   1.75   thorpej 	IPQ_UNLOCK();
   1412    1.1       cgd 	splx(s);
   1413    1.1       cgd }
   1414    1.1       cgd 
   1415    1.1       cgd /*
   1416    1.1       cgd  * Drain off all datagram fragments.
   1417    1.1       cgd  */
   1418    1.8   mycroft void
   1419  1.211     perry ip_drain(void)
   1420    1.1       cgd {
   1421    1.1       cgd 
   1422   1.75   thorpej 	/*
   1423   1.75   thorpej 	 * We may be called from a device's interrupt context.  If
   1424   1.75   thorpej 	 * the ipq is already busy, just bail out now.
   1425   1.75   thorpej 	 */
   1426   1.75   thorpej 	if (ipq_lock_try() == 0)
   1427   1.75   thorpej 		return;
   1428   1.75   thorpej 
   1429  1.194  jonathan 	/*
   1430  1.194  jonathan 	 * Drop half the total fragments now. If more mbufs are needed,
   1431  1.194  jonathan 	 *  we will be called again soon.
   1432  1.194  jonathan 	 */
   1433  1.194  jonathan 	ip_reass_drophalf();
   1434   1.75   thorpej 
   1435   1.75   thorpej 	IPQ_UNLOCK();
   1436    1.1       cgd }
   1437    1.1       cgd 
   1438    1.1       cgd /*
   1439    1.1       cgd  * Do option processing on a datagram,
   1440    1.1       cgd  * possibly discarding it if bad options are encountered,
   1441    1.1       cgd  * or forwarding it if source-routed.
   1442    1.1       cgd  * Returns 1 if packet has been forwarded/freed,
   1443    1.1       cgd  * 0 if the packet should be processed further.
   1444    1.1       cgd  */
   1445    1.8   mycroft int
   1446  1.211     perry ip_dooptions(struct mbuf *m)
   1447    1.1       cgd {
   1448  1.109  augustss 	struct ip *ip = mtod(m, struct ip *);
   1449  1.109  augustss 	u_char *cp, *cp0;
   1450  1.109  augustss 	struct ip_timestamp *ipt;
   1451  1.109  augustss 	struct in_ifaddr *ia;
   1452    1.1       cgd 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
   1453  1.104   thorpej 	struct in_addr dst;
   1454    1.1       cgd 	n_time ntime;
   1455    1.1       cgd 
   1456   1.13   mycroft 	dst = ip->ip_dst;
   1457    1.1       cgd 	cp = (u_char *)(ip + 1);
   1458    1.1       cgd 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
   1459    1.1       cgd 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
   1460    1.1       cgd 		opt = cp[IPOPT_OPTVAL];
   1461    1.1       cgd 		if (opt == IPOPT_EOL)
   1462    1.1       cgd 			break;
   1463    1.1       cgd 		if (opt == IPOPT_NOP)
   1464    1.1       cgd 			optlen = 1;
   1465    1.1       cgd 		else {
   1466  1.113    itojun 			if (cnt < IPOPT_OLEN + sizeof(*cp)) {
   1467  1.113    itojun 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1468  1.113    itojun 				goto bad;
   1469  1.113    itojun 			}
   1470    1.1       cgd 			optlen = cp[IPOPT_OLEN];
   1471  1.114    itojun 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
   1472    1.1       cgd 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1473    1.1       cgd 				goto bad;
   1474    1.1       cgd 			}
   1475    1.1       cgd 		}
   1476    1.1       cgd 		switch (opt) {
   1477    1.1       cgd 
   1478    1.1       cgd 		default:
   1479    1.1       cgd 			break;
   1480    1.1       cgd 
   1481    1.1       cgd 		/*
   1482    1.1       cgd 		 * Source routing with record.
   1483    1.1       cgd 		 * Find interface with current destination address.
   1484    1.1       cgd 		 * If none on this machine then drop if strictly routed,
   1485    1.1       cgd 		 * or do nothing if loosely routed.
   1486    1.1       cgd 		 * Record interface address and bring up next address
   1487    1.1       cgd 		 * component.  If strictly routed make sure next
   1488    1.1       cgd 		 * address is on directly accessible net.
   1489    1.1       cgd 		 */
   1490    1.1       cgd 		case IPOPT_LSRR:
   1491    1.1       cgd 		case IPOPT_SSRR:
   1492   1.47       cjs 			if (ip_allowsrcrt == 0) {
   1493   1.47       cjs 				type = ICMP_UNREACH;
   1494   1.47       cjs 				code = ICMP_UNREACH_NET_PROHIB;
   1495   1.47       cjs 				goto bad;
   1496   1.47       cjs 			}
   1497  1.114    itojun 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
   1498  1.114    itojun 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1499  1.114    itojun 				goto bad;
   1500  1.114    itojun 			}
   1501    1.1       cgd 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
   1502    1.1       cgd 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
   1503    1.1       cgd 				goto bad;
   1504    1.1       cgd 			}
   1505    1.1       cgd 			ipaddr.sin_addr = ip->ip_dst;
   1506   1.19   mycroft 			ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
   1507    1.1       cgd 			if (ia == 0) {
   1508    1.1       cgd 				if (opt == IPOPT_SSRR) {
   1509    1.1       cgd 					type = ICMP_UNREACH;
   1510    1.1       cgd 					code = ICMP_UNREACH_SRCFAIL;
   1511    1.1       cgd 					goto bad;
   1512    1.1       cgd 				}
   1513    1.1       cgd 				/*
   1514    1.1       cgd 				 * Loose routing, and not at next destination
   1515    1.1       cgd 				 * yet; nothing to do except forward.
   1516    1.1       cgd 				 */
   1517    1.1       cgd 				break;
   1518    1.1       cgd 			}
   1519    1.1       cgd 			off--;			/* 0 origin */
   1520  1.112  sommerfe 			if ((off + sizeof(struct in_addr)) > optlen) {
   1521    1.1       cgd 				/*
   1522    1.1       cgd 				 * End of source route.  Should be for us.
   1523    1.1       cgd 				 */
   1524    1.1       cgd 				save_rte(cp, ip->ip_src);
   1525    1.1       cgd 				break;
   1526    1.1       cgd 			}
   1527    1.1       cgd 			/*
   1528    1.1       cgd 			 * locate outgoing interface
   1529    1.1       cgd 			 */
   1530  1.244  christos 			bcopy((void *)(cp + off), (void *)&ipaddr.sin_addr,
   1531    1.1       cgd 			    sizeof(ipaddr.sin_addr));
   1532   1.96   thorpej 			if (opt == IPOPT_SSRR)
   1533  1.196    itojun 				ia = ifatoia(ifa_ifwithladdr(sintosa(&ipaddr)));
   1534   1.96   thorpej 			else
   1535    1.1       cgd 				ia = ip_rtaddr(ipaddr.sin_addr);
   1536    1.1       cgd 			if (ia == 0) {
   1537    1.1       cgd 				type = ICMP_UNREACH;
   1538    1.1       cgd 				code = ICMP_UNREACH_SRCFAIL;
   1539    1.1       cgd 				goto bad;
   1540    1.1       cgd 			}
   1541    1.1       cgd 			ip->ip_dst = ipaddr.sin_addr;
   1542  1.244  christos 			bcopy((void *)&ia->ia_addr.sin_addr,
   1543  1.244  christos 			    (void *)(cp + off), sizeof(struct in_addr));
   1544    1.1       cgd 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
   1545   1.13   mycroft 			/*
   1546   1.13   mycroft 			 * Let ip_intr's mcast routing check handle mcast pkts
   1547   1.13   mycroft 			 */
   1548   1.18   mycroft 			forward = !IN_MULTICAST(ip->ip_dst.s_addr);
   1549    1.1       cgd 			break;
   1550    1.1       cgd 
   1551    1.1       cgd 		case IPOPT_RR:
   1552  1.114    itojun 			if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
   1553  1.114    itojun 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
   1554  1.114    itojun 				goto bad;
   1555  1.114    itojun 			}
   1556    1.1       cgd 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
   1557    1.1       cgd 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
   1558    1.1       cgd 				goto bad;
   1559    1.1       cgd 			}
   1560    1.1       cgd 			/*
   1561    1.1       cgd 			 * If no space remains, ignore.
   1562    1.1       cgd 			 */
   1563    1.1       cgd 			off--;			/* 0 origin */
   1564  1.112  sommerfe 			if ((off + sizeof(struct in_addr)) > optlen)
   1565    1.1       cgd 				break;
   1566  1.244  christos 			bcopy((void *)(&ip->ip_dst), (void *)&ipaddr.sin_addr,
   1567    1.1       cgd 			    sizeof(ipaddr.sin_addr));
   1568    1.1       cgd 			/*
   1569    1.1       cgd 			 * locate outgoing interface; if we're the destination,
   1570    1.1       cgd 			 * use the incoming interface (should be same).
   1571    1.1       cgd 			 */
   1572   1.96   thorpej 			if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
   1573   1.96   thorpej 			    == NULL &&
   1574   1.96   thorpej 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
   1575    1.1       cgd 				type = ICMP_UNREACH;
   1576    1.1       cgd 				code = ICMP_UNREACH_HOST;
   1577    1.1       cgd 				goto bad;
   1578    1.1       cgd 			}
   1579  1.244  christos 			bcopy((void *)&ia->ia_addr.sin_addr,
   1580  1.244  christos 			    (void *)(cp + off), sizeof(struct in_addr));
   1581    1.1       cgd 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
   1582    1.1       cgd 			break;
   1583    1.1       cgd 
   1584    1.1       cgd 		case IPOPT_TS:
   1585    1.1       cgd 			code = cp - (u_char *)ip;
   1586    1.1       cgd 			ipt = (struct ip_timestamp *)cp;
   1587  1.114    itojun 			if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
   1588  1.114    itojun 				code = (u_char *)&ipt->ipt_len - (u_char *)ip;
   1589    1.1       cgd 				goto bad;
   1590  1.114    itojun 			}
   1591  1.114    itojun 			if (ipt->ipt_ptr < 5) {
   1592  1.114    itojun 				code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
   1593  1.114    itojun 				goto bad;
   1594  1.114    itojun 			}
   1595   1.15       cgd 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
   1596  1.114    itojun 				if (++ipt->ipt_oflw == 0) {
   1597  1.114    itojun 					code = (u_char *)&ipt->ipt_ptr -
   1598  1.114    itojun 					    (u_char *)ip;
   1599    1.1       cgd 					goto bad;
   1600  1.114    itojun 				}
   1601    1.1       cgd 				break;
   1602    1.1       cgd 			}
   1603  1.104   thorpej 			cp0 = (cp + ipt->ipt_ptr - 1);
   1604    1.1       cgd 			switch (ipt->ipt_flg) {
   1605    1.1       cgd 
   1606    1.1       cgd 			case IPOPT_TS_TSONLY:
   1607    1.1       cgd 				break;
   1608    1.1       cgd 
   1609    1.1       cgd 			case IPOPT_TS_TSANDADDR:
   1610   1.66   thorpej 				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
   1611  1.114    itojun 				    sizeof(struct in_addr) > ipt->ipt_len) {
   1612  1.114    itojun 					code = (u_char *)&ipt->ipt_ptr -
   1613  1.114    itojun 					    (u_char *)ip;
   1614    1.1       cgd 					goto bad;
   1615  1.114    itojun 				}
   1616   1.13   mycroft 				ipaddr.sin_addr = dst;
   1617   1.96   thorpej 				ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
   1618   1.96   thorpej 				    m->m_pkthdr.rcvif));
   1619   1.13   mycroft 				if (ia == 0)
   1620   1.13   mycroft 					continue;
   1621  1.104   thorpej 				bcopy(&ia->ia_addr.sin_addr,
   1622  1.104   thorpej 				    cp0, sizeof(struct in_addr));
   1623    1.1       cgd 				ipt->ipt_ptr += sizeof(struct in_addr);
   1624    1.1       cgd 				break;
   1625    1.1       cgd 
   1626    1.1       cgd 			case IPOPT_TS_PRESPEC:
   1627   1.66   thorpej 				if (ipt->ipt_ptr - 1 + sizeof(n_time) +
   1628  1.114    itojun 				    sizeof(struct in_addr) > ipt->ipt_len) {
   1629  1.114    itojun 					code = (u_char *)&ipt->ipt_ptr -
   1630  1.114    itojun 					    (u_char *)ip;
   1631    1.1       cgd 					goto bad;
   1632  1.114    itojun 				}
   1633  1.104   thorpej 				bcopy(cp0, &ipaddr.sin_addr,
   1634    1.1       cgd 				    sizeof(struct in_addr));
   1635   1.96   thorpej 				if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
   1636   1.96   thorpej 				    == NULL)
   1637    1.1       cgd 					continue;
   1638    1.1       cgd 				ipt->ipt_ptr += sizeof(struct in_addr);
   1639    1.1       cgd 				break;
   1640    1.1       cgd 
   1641    1.1       cgd 			default:
   1642  1.114    itojun 				/* XXX can't take &ipt->ipt_flg */
   1643  1.114    itojun 				code = (u_char *)&ipt->ipt_ptr -
   1644  1.114    itojun 				    (u_char *)ip + 1;
   1645    1.1       cgd 				goto bad;
   1646    1.1       cgd 			}
   1647    1.1       cgd 			ntime = iptime();
   1648  1.107   thorpej 			cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
   1649  1.244  christos 			memmove((char *)cp + ipt->ipt_ptr - 1, cp0,
   1650    1.1       cgd 			    sizeof(n_time));
   1651    1.1       cgd 			ipt->ipt_ptr += sizeof(n_time);
   1652    1.1       cgd 		}
   1653    1.1       cgd 	}
   1654    1.1       cgd 	if (forward) {
   1655   1.26   thorpej 		if (ip_forwsrcrt == 0) {
   1656   1.26   thorpej 			type = ICMP_UNREACH;
   1657   1.26   thorpej 			code = ICMP_UNREACH_SRCFAIL;
   1658   1.26   thorpej 			goto bad;
   1659   1.26   thorpej 		}
   1660    1.1       cgd 		ip_forward(m, 1);
   1661    1.1       cgd 		return (1);
   1662   1.13   mycroft 	}
   1663   1.13   mycroft 	return (0);
   1664    1.1       cgd bad:
   1665   1.13   mycroft 	icmp_error(m, type, code, 0, 0);
   1666   1.13   mycroft 	ipstat.ips_badoptions++;
   1667    1.1       cgd 	return (1);
   1668    1.1       cgd }
   1669    1.1       cgd 
   1670    1.1       cgd /*
   1671    1.1       cgd  * Given address of next destination (final or next hop),
   1672    1.1       cgd  * return internet address info of interface to be used to get there.
   1673    1.1       cgd  */
   1674    1.1       cgd struct in_ifaddr *
   1675  1.211     perry ip_rtaddr(struct in_addr dst)
   1676    1.1       cgd {
   1677  1.243    dyoung 	if (!in_hosteq(dst, satocsin(rtcache_getdst(&ipforward_rt))->sin_addr))
   1678  1.240     joerg 		rtcache_free(&ipforward_rt);
   1679  1.240     joerg 	else
   1680  1.240     joerg 		rtcache_check(&ipforward_rt);
   1681  1.243    dyoung 
   1682  1.240     joerg 	if (ipforward_rt.ro_rt == NULL) {
   1683  1.243    dyoung 		struct sockaddr_in *sin = satosin(&ipforward_rt.ro_dst);
   1684  1.243    dyoung 
   1685    1.1       cgd 		sin->sin_family = AF_INET;
   1686    1.1       cgd 		sin->sin_len = sizeof(*sin);
   1687    1.1       cgd 		sin->sin_addr = dst;
   1688    1.1       cgd 
   1689  1.240     joerg 		rtcache_init(&ipforward_rt);
   1690  1.240     joerg 		if (ipforward_rt.ro_rt == NULL)
   1691  1.240     joerg 			return NULL;
   1692    1.1       cgd 	}
   1693  1.242    dyoung 	return ifatoia(ipforward_rt.ro_rt->rt_ifa);
   1694    1.1       cgd }
   1695    1.1       cgd 
   1696    1.1       cgd /*
   1697    1.1       cgd  * Save incoming source route for use in replies,
   1698    1.1       cgd  * to be picked up later by ip_srcroute if the receiver is interested.
   1699    1.1       cgd  */
   1700   1.13   mycroft void
   1701  1.211     perry save_rte(u_char *option, struct in_addr dst)
   1702    1.1       cgd {
   1703    1.1       cgd 	unsigned olen;
   1704    1.1       cgd 
   1705    1.1       cgd 	olen = option[IPOPT_OLEN];
   1706    1.1       cgd #ifdef DIAGNOSTIC
   1707    1.1       cgd 	if (ipprintfs)
   1708   1.39  christos 		printf("save_rte: olen %d\n", olen);
   1709   1.89    itojun #endif /* 0 */
   1710    1.1       cgd 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
   1711    1.1       cgd 		return;
   1712  1.244  christos 	bcopy((void *)option, (void *)ip_srcrt.srcopt, olen);
   1713    1.1       cgd 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
   1714    1.1       cgd 	ip_srcrt.dst = dst;
   1715    1.1       cgd }
   1716    1.1       cgd 
   1717    1.1       cgd /*
   1718    1.1       cgd  * Retrieve incoming source route for use in replies,
   1719    1.1       cgd  * in the same form used by setsockopt.
   1720    1.1       cgd  * The first hop is placed before the options, will be removed later.
   1721    1.1       cgd  */
   1722    1.1       cgd struct mbuf *
   1723  1.211     perry ip_srcroute(void)
   1724    1.1       cgd {
   1725  1.109  augustss 	struct in_addr *p, *q;
   1726  1.109  augustss 	struct mbuf *m;
   1727    1.1       cgd 
   1728    1.1       cgd 	if (ip_nhops == 0)
   1729  1.237    dyoung 		return NULL;
   1730    1.1       cgd 	m = m_get(M_DONTWAIT, MT_SOOPTS);
   1731    1.1       cgd 	if (m == 0)
   1732  1.237    dyoung 		return NULL;
   1733    1.1       cgd 
   1734  1.164      matt 	MCLAIM(m, &inetdomain.dom_mowner);
   1735   1.13   mycroft #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
   1736    1.1       cgd 
   1737    1.1       cgd 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
   1738    1.1       cgd 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
   1739    1.1       cgd 	    OPTSIZ;
   1740    1.1       cgd #ifdef DIAGNOSTIC
   1741    1.1       cgd 	if (ipprintfs)
   1742   1.39  christos 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
   1743    1.1       cgd #endif
   1744    1.1       cgd 
   1745    1.1       cgd 	/*
   1746    1.1       cgd 	 * First save first hop for return route
   1747    1.1       cgd 	 */
   1748    1.1       cgd 	p = &ip_srcrt.route[ip_nhops - 1];
   1749    1.1       cgd 	*(mtod(m, struct in_addr *)) = *p--;
   1750    1.1       cgd #ifdef DIAGNOSTIC
   1751    1.1       cgd 	if (ipprintfs)
   1752   1.39  christos 		printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
   1753    1.1       cgd #endif
   1754    1.1       cgd 
   1755    1.1       cgd 	/*
   1756    1.1       cgd 	 * Copy option fields and padding (nop) to mbuf.
   1757    1.1       cgd 	 */
   1758    1.1       cgd 	ip_srcrt.nop = IPOPT_NOP;
   1759    1.1       cgd 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
   1760  1.244  christos 	memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop,
   1761  1.244  christos 	    OPTSIZ);
   1762  1.244  christos 	q = (struct in_addr *)(mtod(m, char *) +
   1763    1.1       cgd 	    sizeof(struct in_addr) + OPTSIZ);
   1764    1.1       cgd #undef OPTSIZ
   1765    1.1       cgd 	/*
   1766    1.1       cgd 	 * Record return path as an IP source route,
   1767    1.1       cgd 	 * reversing the path (pointers are now aligned).
   1768    1.1       cgd 	 */
   1769    1.1       cgd 	while (p >= ip_srcrt.route) {
   1770    1.1       cgd #ifdef DIAGNOSTIC
   1771    1.1       cgd 		if (ipprintfs)
   1772   1.39  christos 			printf(" %x", ntohl(q->s_addr));
   1773    1.1       cgd #endif
   1774    1.1       cgd 		*q++ = *p--;
   1775    1.1       cgd 	}
   1776    1.1       cgd 	/*
   1777    1.1       cgd 	 * Last hop goes to final destination.
   1778    1.1       cgd 	 */
   1779    1.1       cgd 	*q = ip_srcrt.dst;
   1780    1.1       cgd #ifdef DIAGNOSTIC
   1781    1.1       cgd 	if (ipprintfs)
   1782   1.39  christos 		printf(" %x\n", ntohl(q->s_addr));
   1783    1.1       cgd #endif
   1784    1.1       cgd 	return (m);
   1785    1.1       cgd }
   1786    1.1       cgd 
   1787    1.1       cgd /*
   1788    1.1       cgd  * Strip out IP options, at higher
   1789    1.1       cgd  * level protocol in the kernel.
   1790    1.1       cgd  * Second argument is buffer to which options
   1791    1.1       cgd  * will be moved, and return value is their length.
   1792    1.1       cgd  * XXX should be deleted; last arg currently ignored.
   1793    1.1       cgd  */
   1794    1.8   mycroft void
   1795  1.236  christos ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
   1796    1.1       cgd {
   1797  1.109  augustss 	int i;
   1798    1.1       cgd 	struct ip *ip = mtod(m, struct ip *);
   1799  1.244  christos 	void *opts;
   1800    1.1       cgd 	int olen;
   1801    1.1       cgd 
   1802   1.79   mycroft 	olen = (ip->ip_hl << 2) - sizeof (struct ip);
   1803  1.244  christos 	opts = (void *)(ip + 1);
   1804    1.1       cgd 	i = m->m_len - (sizeof (struct ip) + olen);
   1805  1.244  christos 	memmove(opts, (char *)opts + olen, (unsigned)i);
   1806    1.1       cgd 	m->m_len -= olen;
   1807    1.1       cgd 	if (m->m_flags & M_PKTHDR)
   1808    1.1       cgd 		m->m_pkthdr.len -= olen;
   1809  1.155    itojun 	ip->ip_len = htons(ntohs(ip->ip_len) - olen);
   1810   1.79   mycroft 	ip->ip_hl = sizeof (struct ip) >> 2;
   1811    1.1       cgd }
   1812    1.1       cgd 
   1813  1.139      matt const int inetctlerrmap[PRC_NCMDS] = {
   1814    1.1       cgd 	0,		0,		0,		0,
   1815    1.1       cgd 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
   1816    1.1       cgd 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
   1817    1.1       cgd 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
   1818    1.1       cgd 	0,		0,		0,		0,
   1819    1.1       cgd 	ENOPROTOOPT
   1820    1.1       cgd };
   1821    1.1       cgd 
   1822    1.1       cgd /*
   1823    1.1       cgd  * Forward a packet.  If some error occurs return the sender
   1824    1.1       cgd  * an icmp packet.  Note we can't always generate a meaningful
   1825    1.1       cgd  * icmp message because icmp doesn't have a large enough repertoire
   1826    1.1       cgd  * of codes and types.
   1827    1.1       cgd  *
   1828    1.1       cgd  * If not forwarding, just drop the packet.  This could be confusing
   1829    1.1       cgd  * if ipforwarding was zero but some routing protocol was advancing
   1830    1.1       cgd  * us as a gateway to somewhere.  However, we must let the routing
   1831    1.1       cgd  * protocol deal with that.
   1832    1.1       cgd  *
   1833    1.1       cgd  * The srcrt parameter indicates whether the packet is being forwarded
   1834    1.1       cgd  * via a source route.
   1835    1.1       cgd  */
   1836   1.13   mycroft void
   1837  1.211     perry ip_forward(struct mbuf *m, int srcrt)
   1838    1.1       cgd {
   1839  1.109  augustss 	struct ip *ip = mtod(m, struct ip *);
   1840  1.109  augustss 	struct rtentry *rt;
   1841  1.220  christos 	int error, type = 0, code = 0, destmtu = 0;
   1842    1.1       cgd 	struct mbuf *mcopy;
   1843   1.13   mycroft 	n_long dest;
   1844  1.164      matt 
   1845  1.164      matt 	/*
   1846  1.164      matt 	 * We are now in the output path.
   1847  1.164      matt 	 */
   1848  1.164      matt 	MCLAIM(m, &ip_tx_mowner);
   1849  1.135   thorpej 
   1850  1.135   thorpej 	/*
   1851  1.135   thorpej 	 * Clear any in-bound checksum flags for this packet.
   1852  1.135   thorpej 	 */
   1853  1.135   thorpej 	m->m_pkthdr.csum_flags = 0;
   1854    1.1       cgd 
   1855   1.13   mycroft 	dest = 0;
   1856    1.1       cgd #ifdef DIAGNOSTIC
   1857  1.224     joerg 	if (ipprintfs) {
   1858  1.224     joerg 		printf("forward: src %s ", inet_ntoa(ip->ip_src));
   1859  1.224     joerg 		printf("dst %s ttl %x\n", inet_ntoa(ip->ip_dst), ip->ip_ttl);
   1860  1.224     joerg 	}
   1861    1.1       cgd #endif
   1862   1.93  sommerfe 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
   1863    1.1       cgd 		ipstat.ips_cantforward++;
   1864    1.1       cgd 		m_freem(m);
   1865    1.1       cgd 		return;
   1866    1.1       cgd 	}
   1867    1.1       cgd 	if (ip->ip_ttl <= IPTTLDEC) {
   1868   1.13   mycroft 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
   1869    1.1       cgd 		return;
   1870    1.1       cgd 	}
   1871    1.1       cgd 
   1872  1.243    dyoung 	if (!in_hosteq(ip->ip_dst,
   1873  1.243    dyoung 	               satocsin(rtcache_getdst(&ipforward_rt))->sin_addr))
   1874  1.240     joerg 		rtcache_free(&ipforward_rt);
   1875  1.240     joerg 	else
   1876  1.240     joerg 		rtcache_check(&ipforward_rt);
   1877  1.240     joerg 	if (ipforward_rt.ro_rt == NULL) {
   1878  1.243    dyoung 		struct sockaddr_in *sin = satosin(&ipforward_rt.ro_dst);
   1879  1.243    dyoung 
   1880    1.1       cgd 		sin->sin_family = AF_INET;
   1881  1.242    dyoung 		sin->sin_len = sizeof(*sin);
   1882    1.1       cgd 		sin->sin_addr = ip->ip_dst;
   1883    1.1       cgd 
   1884  1.240     joerg 		rtcache_init(&ipforward_rt);
   1885  1.239    dyoung 		if (ipforward_rt.ro_rt == NULL) {
   1886  1.218     seanb 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0);
   1887    1.1       cgd 			return;
   1888    1.1       cgd 		}
   1889    1.1       cgd 	}
   1890  1.240     joerg 	rt = ipforward_rt.ro_rt;
   1891    1.1       cgd 
   1892    1.1       cgd 	/*
   1893   1.34   mycroft 	 * Save at most 68 bytes of the packet in case
   1894    1.1       cgd 	 * we need to generate an ICMP message to the src.
   1895  1.119    itojun 	 * Pullup to avoid sharing mbuf cluster between m and mcopy.
   1896    1.1       cgd 	 */
   1897  1.155    itojun 	mcopy = m_copym(m, 0, imin(ntohs(ip->ip_len), 68), M_DONTWAIT);
   1898  1.119    itojun 	if (mcopy)
   1899  1.119    itojun 		mcopy = m_pullup(mcopy, ip->ip_hl << 2);
   1900    1.1       cgd 
   1901  1.221  christos 	ip->ip_ttl -= IPTTLDEC;
   1902  1.221  christos 
   1903    1.1       cgd 	/*
   1904    1.1       cgd 	 * If forwarding packet using same interface that it came in on,
   1905    1.1       cgd 	 * perhaps should send a redirect to sender to shortcut a hop.
   1906    1.1       cgd 	 * Only send redirect if source is sending directly to us,
   1907    1.1       cgd 	 * and if packet was not source routed (or has any options).
   1908    1.1       cgd 	 * Also, don't send redirect if forwarding using a default route
   1909    1.1       cgd 	 * or a route modified by a redirect.
   1910    1.1       cgd 	 */
   1911    1.1       cgd 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
   1912    1.1       cgd 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
   1913   1.35   mycroft 	    !in_nullhost(satosin(rt_key(rt))->sin_addr) &&
   1914    1.1       cgd 	    ipsendredirects && !srcrt) {
   1915   1.19   mycroft 		if (rt->rt_ifa &&
   1916   1.19   mycroft 		    (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
   1917   1.19   mycroft 		    ifatoia(rt->rt_ifa)->ia_subnet) {
   1918   1.77   thorpej 			if (rt->rt_flags & RTF_GATEWAY)
   1919   1.77   thorpej 				dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
   1920   1.77   thorpej 			else
   1921   1.77   thorpej 				dest = ip->ip_dst.s_addr;
   1922   1.77   thorpej 			/*
   1923   1.77   thorpej 			 * Router requirements says to only send host
   1924   1.77   thorpej 			 * redirects.
   1925   1.77   thorpej 			 */
   1926   1.77   thorpej 			type = ICMP_REDIRECT;
   1927   1.77   thorpej 			code = ICMP_REDIRECT_HOST;
   1928    1.1       cgd #ifdef DIAGNOSTIC
   1929   1.77   thorpej 			if (ipprintfs)
   1930   1.77   thorpej 				printf("redirect (%d) to %x\n", code,
   1931   1.77   thorpej 				    (u_int32_t)dest);
   1932    1.1       cgd #endif
   1933    1.1       cgd 		}
   1934    1.1       cgd 	}
   1935    1.1       cgd 
   1936  1.238    dyoung 	error = ip_output(m, NULL, &ipforward_rt,
   1937  1.173  jonathan 	    (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
   1938  1.174    itojun 	    (struct ip_moptions *)NULL, (struct socket *)NULL);
   1939  1.173  jonathan 
   1940    1.1       cgd 	if (error)
   1941    1.1       cgd 		ipstat.ips_cantforward++;
   1942    1.1       cgd 	else {
   1943    1.1       cgd 		ipstat.ips_forward++;
   1944    1.1       cgd 		if (type)
   1945    1.1       cgd 			ipstat.ips_redirectsent++;
   1946    1.1       cgd 		else {
   1947   1.63      matt 			if (mcopy) {
   1948   1.63      matt #ifdef GATEWAY
   1949   1.64   thorpej 				if (mcopy->m_flags & M_CANFASTFWD)
   1950   1.64   thorpej 					ipflow_create(&ipforward_rt, mcopy);
   1951   1.63      matt #endif
   1952    1.1       cgd 				m_freem(mcopy);
   1953   1.63      matt 			}
   1954    1.1       cgd 			return;
   1955    1.1       cgd 		}
   1956    1.1       cgd 	}
   1957    1.1       cgd 	if (mcopy == NULL)
   1958    1.1       cgd 		return;
   1959   1.13   mycroft 
   1960    1.1       cgd 	switch (error) {
   1961    1.1       cgd 
   1962    1.1       cgd 	case 0:				/* forwarded, but need redirect */
   1963    1.1       cgd 		/* type, code set above */
   1964    1.1       cgd 		break;
   1965    1.1       cgd 
   1966    1.1       cgd 	case ENETUNREACH:		/* shouldn't happen, checked above */
   1967    1.1       cgd 	case EHOSTUNREACH:
   1968    1.1       cgd 	case ENETDOWN:
   1969    1.1       cgd 	case EHOSTDOWN:
   1970    1.1       cgd 	default:
   1971    1.1       cgd 		type = ICMP_UNREACH;
   1972    1.1       cgd 		code = ICMP_UNREACH_HOST;
   1973    1.1       cgd 		break;
   1974    1.1       cgd 
   1975    1.1       cgd 	case EMSGSIZE:
   1976    1.1       cgd 		type = ICMP_UNREACH;
   1977    1.1       cgd 		code = ICMP_UNREACH_NEEDFRAG;
   1978  1.173  jonathan #if !defined(IPSEC) && !defined(FAST_IPSEC)
   1979  1.238    dyoung 		if (ipforward_rt.ro_rt != NULL)
   1980  1.220  christos 			destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
   1981   1.89    itojun #else
   1982   1.89    itojun 		/*
   1983   1.89    itojun 		 * If the packet is routed over IPsec tunnel, tell the
   1984   1.89    itojun 		 * originator the tunnel MTU.
   1985   1.89    itojun 		 *	tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
   1986   1.89    itojun 		 * XXX quickhack!!!
   1987   1.89    itojun 		 */
   1988  1.238    dyoung 		if (ipforward_rt.ro_rt != NULL) {
   1989   1.89    itojun 			struct secpolicy *sp;
   1990   1.89    itojun 			int ipsecerror;
   1991   1.95    itojun 			size_t ipsechdr;
   1992   1.89    itojun 			struct route *ro;
   1993   1.89    itojun 
   1994   1.89    itojun 			sp = ipsec4_getpolicybyaddr(mcopy,
   1995  1.170    itojun 			    IPSEC_DIR_OUTBOUND, IP_FORWARDING,
   1996  1.170    itojun 			    &ipsecerror);
   1997   1.89    itojun 
   1998   1.89    itojun 			if (sp == NULL)
   1999  1.220  christos 				destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
   2000   1.89    itojun 			else {
   2001   1.89    itojun 				/* count IPsec header size */
   2002   1.95    itojun 				ipsechdr = ipsec4_hdrsiz(mcopy,
   2003  1.170    itojun 				    IPSEC_DIR_OUTBOUND, NULL);
   2004   1.89    itojun 
   2005   1.89    itojun 				/*
   2006   1.89    itojun 				 * find the correct route for outer IPv4
   2007   1.89    itojun 				 * header, compute tunnel MTU.
   2008   1.89    itojun 				 */
   2009  1.220  christos 
   2010   1.89    itojun 				if (sp->req != NULL
   2011   1.95    itojun 				 && sp->req->sav != NULL
   2012   1.95    itojun 				 && sp->req->sav->sah != NULL) {
   2013   1.95    itojun 					ro = &sp->req->sav->sah->sa_route;
   2014   1.89    itojun 					if (ro->ro_rt && ro->ro_rt->rt_ifp) {
   2015  1.220  christos 						destmtu =
   2016  1.151    itojun 						    ro->ro_rt->rt_rmx.rmx_mtu ?
   2017  1.151    itojun 						    ro->ro_rt->rt_rmx.rmx_mtu :
   2018   1.89    itojun 						    ro->ro_rt->rt_ifp->if_mtu;
   2019  1.220  christos 						destmtu -= ipsechdr;
   2020   1.89    itojun 					}
   2021   1.89    itojun 				}
   2022   1.89    itojun 
   2023  1.173  jonathan #ifdef	IPSEC
   2024   1.89    itojun 				key_freesp(sp);
   2025  1.173  jonathan #else
   2026  1.173  jonathan 				KEY_FREESP(&sp);
   2027  1.173  jonathan #endif
   2028   1.89    itojun 			}
   2029   1.89    itojun 		}
   2030   1.89    itojun #endif /*IPSEC*/
   2031    1.1       cgd 		ipstat.ips_cantfrag++;
   2032    1.1       cgd 		break;
   2033    1.1       cgd 
   2034    1.1       cgd 	case ENOBUFS:
   2035  1.143    itojun #if 1
   2036  1.143    itojun 		/*
   2037  1.143    itojun 		 * a router should not generate ICMP_SOURCEQUENCH as
   2038  1.143    itojun 		 * required in RFC1812 Requirements for IP Version 4 Routers.
   2039  1.143    itojun 		 * source quench could be a big problem under DoS attacks,
   2040  1.149       wiz 		 * or if the underlying interface is rate-limited.
   2041  1.143    itojun 		 */
   2042  1.143    itojun 		if (mcopy)
   2043  1.143    itojun 			m_freem(mcopy);
   2044  1.143    itojun 		return;
   2045  1.143    itojun #else
   2046    1.1       cgd 		type = ICMP_SOURCEQUENCH;
   2047    1.1       cgd 		code = 0;
   2048    1.1       cgd 		break;
   2049  1.143    itojun #endif
   2050    1.1       cgd 	}
   2051  1.220  christos 	icmp_error(mcopy, type, code, dest, destmtu);
   2052   1.44   thorpej }
   2053   1.44   thorpej 
   2054   1.44   thorpej void
   2055  1.211     perry ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
   2056  1.211     perry     struct mbuf *m)
   2057   1.44   thorpej {
   2058   1.44   thorpej 
   2059   1.44   thorpej 	if (inp->inp_socket->so_options & SO_TIMESTAMP) {
   2060   1.44   thorpej 		struct timeval tv;
   2061   1.44   thorpej 
   2062   1.44   thorpej 		microtime(&tv);
   2063  1.244  christos 		*mp = sbcreatecontrol((void *) &tv, sizeof(tv),
   2064   1.44   thorpej 		    SCM_TIMESTAMP, SOL_SOCKET);
   2065   1.44   thorpej 		if (*mp)
   2066   1.44   thorpej 			mp = &(*mp)->m_next;
   2067   1.44   thorpej 	}
   2068   1.44   thorpej 	if (inp->inp_flags & INP_RECVDSTADDR) {
   2069  1.244  christos 		*mp = sbcreatecontrol((void *) &ip->ip_dst,
   2070   1.44   thorpej 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
   2071   1.44   thorpej 		if (*mp)
   2072   1.44   thorpej 			mp = &(*mp)->m_next;
   2073   1.44   thorpej 	}
   2074   1.44   thorpej #ifdef notyet
   2075   1.44   thorpej 	/*
   2076   1.44   thorpej 	 * XXX
   2077   1.44   thorpej 	 * Moving these out of udp_input() made them even more broken
   2078   1.44   thorpej 	 * than they already were.
   2079   1.44   thorpej 	 *	- fenner (at) parc.xerox.com
   2080   1.44   thorpej 	 */
   2081   1.44   thorpej 	/* options were tossed already */
   2082   1.44   thorpej 	if (inp->inp_flags & INP_RECVOPTS) {
   2083  1.244  christos 		*mp = sbcreatecontrol((void *) opts_deleted_above,
   2084   1.44   thorpej 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
   2085   1.44   thorpej 		if (*mp)
   2086   1.44   thorpej 			mp = &(*mp)->m_next;
   2087   1.44   thorpej 	}
   2088   1.44   thorpej 	/* ip_srcroute doesn't do what we want here, need to fix */
   2089   1.44   thorpej 	if (inp->inp_flags & INP_RECVRETOPTS) {
   2090  1.244  christos 		*mp = sbcreatecontrol((void *) ip_srcroute(),
   2091   1.44   thorpej 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
   2092   1.44   thorpej 		if (*mp)
   2093   1.44   thorpej 			mp = &(*mp)->m_next;
   2094   1.44   thorpej 	}
   2095   1.44   thorpej #endif
   2096   1.44   thorpej 	if (inp->inp_flags & INP_RECVIF) {
   2097   1.44   thorpej 		struct sockaddr_dl sdl;
   2098   1.44   thorpej 
   2099   1.44   thorpej 		sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
   2100   1.44   thorpej 		sdl.sdl_family = AF_LINK;
   2101   1.44   thorpej 		sdl.sdl_index = m->m_pkthdr.rcvif ?
   2102   1.44   thorpej 		    m->m_pkthdr.rcvif->if_index : 0;
   2103   1.44   thorpej 		sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
   2104  1.244  christos 		*mp = sbcreatecontrol((void *) &sdl, sdl.sdl_len,
   2105   1.44   thorpej 		    IP_RECVIF, IPPROTO_IP);
   2106   1.44   thorpej 		if (*mp)
   2107   1.44   thorpej 			mp = &(*mp)->m_next;
   2108   1.44   thorpej 	}
   2109   1.13   mycroft }
   2110   1.13   mycroft 
   2111  1.189    atatat /*
   2112  1.228      elad  * sysctl helper routine for net.inet.ip.forwsrcrt.
   2113  1.228      elad  */
   2114  1.228      elad static int
   2115  1.228      elad sysctl_net_inet_ip_forwsrcrt(SYSCTLFN_ARGS)
   2116  1.228      elad {
   2117  1.228      elad 	int error, tmp;
   2118  1.228      elad 	struct sysctlnode node;
   2119  1.228      elad 
   2120  1.228      elad 	node = *rnode;
   2121  1.228      elad 	tmp = ip_forwsrcrt;
   2122  1.228      elad 	node.sysctl_data = &tmp;
   2123  1.228      elad 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   2124  1.228      elad 	if (error || newp == NULL)
   2125  1.228      elad 		return (error);
   2126  1.228      elad 
   2127  1.230      elad 	if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FORWSRCRT,
   2128  1.232      elad 	    0, NULL, NULL, NULL))
   2129  1.228      elad 		return (EPERM);
   2130  1.228      elad 
   2131  1.228      elad 	ip_forwsrcrt = tmp;
   2132  1.228      elad 
   2133  1.228      elad 	return (0);
   2134  1.228      elad }
   2135  1.228      elad 
   2136  1.228      elad /*
   2137  1.189    atatat  * sysctl helper routine for net.inet.ip.mtudisctimeout.  checks the
   2138  1.189    atatat  * range of the new value and tweaks timers if it changes.
   2139  1.189    atatat  */
   2140  1.189    atatat static int
   2141  1.189    atatat sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS)
   2142   1.13   mycroft {
   2143  1.189    atatat 	int error, tmp;
   2144  1.189    atatat 	struct sysctlnode node;
   2145  1.189    atatat 
   2146  1.189    atatat 	node = *rnode;
   2147  1.189    atatat 	tmp = ip_mtudisc_timeout;
   2148  1.189    atatat 	node.sysctl_data = &tmp;
   2149  1.189    atatat 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   2150  1.189    atatat 	if (error || newp == NULL)
   2151  1.189    atatat 		return (error);
   2152  1.189    atatat 	if (tmp < 0)
   2153  1.189    atatat 		return (EINVAL);
   2154   1.52   thorpej 
   2155  1.189    atatat 	ip_mtudisc_timeout = tmp;
   2156  1.189    atatat 	rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout);
   2157  1.189    atatat 
   2158  1.189    atatat 	return (0);
   2159  1.189    atatat }
   2160   1.54     lukem 
   2161   1.65      matt #ifdef GATEWAY
   2162  1.189    atatat /*
   2163  1.247  liamjfoy  * sysctl helper routine for net.inet.ip.maxflows.
   2164  1.189    atatat  */
   2165  1.189    atatat static int
   2166  1.189    atatat sysctl_net_inet_ip_maxflows(SYSCTLFN_ARGS)
   2167  1.189    atatat {
   2168  1.189    atatat 	int s;
   2169   1.67   thorpej 
   2170  1.217    atatat 	s = sysctl_lookup(SYSCTLFN_CALL(rnode));
   2171  1.247  liamjfoy 	if (s || newp == NULL)
   2172  1.189    atatat 		return (s);
   2173  1.212     perry 
   2174  1.189    atatat 	s = splsoftnet();
   2175  1.189    atatat 	ipflow_reap(0);
   2176  1.189    atatat 	splx(s);
   2177  1.144    martin 
   2178  1.189    atatat 	return (0);
   2179  1.189    atatat }
   2180  1.248  liamjfoy 
   2181  1.248  liamjfoy static int
   2182  1.248  liamjfoy sysctl_net_inet_ip_hashsize(SYSCTLFN_ARGS)
   2183  1.248  liamjfoy {
   2184  1.248  liamjfoy 	int error, tmp;
   2185  1.248  liamjfoy 	struct sysctlnode node;
   2186  1.248  liamjfoy 
   2187  1.248  liamjfoy 	node = *rnode;
   2188  1.248  liamjfoy 	tmp = ip_hashsize;
   2189  1.248  liamjfoy 	node.sysctl_data = &tmp;
   2190  1.248  liamjfoy 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
   2191  1.248  liamjfoy 	if (error || newp == NULL)
   2192  1.248  liamjfoy 		return (error);
   2193  1.248  liamjfoy 
   2194  1.248  liamjfoy 	if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
   2195  1.248  liamjfoy 		/*
   2196  1.248  liamjfoy 		 * Can only fail due to malloc()
   2197  1.248  liamjfoy 		 */
   2198  1.248  liamjfoy 		if (ipflow_invalidate_all(tmp))
   2199  1.248  liamjfoy 			return ENOMEM;
   2200  1.248  liamjfoy 	} else {
   2201  1.248  liamjfoy 		/*
   2202  1.248  liamjfoy 		 * EINVAL if not a power of 2
   2203  1.248  liamjfoy 	         */
   2204  1.248  liamjfoy 		return EINVAL;
   2205  1.248  liamjfoy 	}
   2206  1.248  liamjfoy 
   2207  1.248  liamjfoy 	return (0);
   2208  1.248  liamjfoy }
   2209  1.189    atatat #endif /* GATEWAY */
   2210  1.117      tron 
   2211  1.131    itojun 
   2212  1.189    atatat SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup")
   2213  1.189    atatat {
   2214  1.189    atatat 	extern int subnetsarelocal, hostzeroisbroadcast;
   2215  1.180  jonathan 
   2216  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2217  1.197    atatat 		       CTLFLAG_PERMANENT,
   2218  1.189    atatat 		       CTLTYPE_NODE, "net", NULL,
   2219  1.189    atatat 		       NULL, 0, NULL, 0,
   2220  1.189    atatat 		       CTL_NET, CTL_EOL);
   2221  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2222  1.197    atatat 		       CTLFLAG_PERMANENT,
   2223  1.203    atatat 		       CTLTYPE_NODE, "inet",
   2224  1.203    atatat 		       SYSCTL_DESCR("PF_INET related settings"),
   2225  1.189    atatat 		       NULL, 0, NULL, 0,
   2226  1.189    atatat 		       CTL_NET, PF_INET, CTL_EOL);
   2227  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2228  1.197    atatat 		       CTLFLAG_PERMANENT,
   2229  1.203    atatat 		       CTLTYPE_NODE, "ip",
   2230  1.203    atatat 		       SYSCTL_DESCR("IPv4 related settings"),
   2231  1.189    atatat 		       NULL, 0, NULL, 0,
   2232  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
   2233  1.212     perry 
   2234  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2235  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2236  1.203    atatat 		       CTLTYPE_INT, "forwarding",
   2237  1.203    atatat 		       SYSCTL_DESCR("Enable forwarding of INET datagrams"),
   2238  1.189    atatat 		       NULL, 0, &ipforwarding, 0,
   2239  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2240  1.189    atatat 		       IPCTL_FORWARDING, CTL_EOL);
   2241  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2242  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2243  1.203    atatat 		       CTLTYPE_INT, "redirect",
   2244  1.203    atatat 		       SYSCTL_DESCR("Enable sending of ICMP redirect messages"),
   2245  1.189    atatat 		       NULL, 0, &ipsendredirects, 0,
   2246  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2247  1.189    atatat 		       IPCTL_SENDREDIRECTS, CTL_EOL);
   2248  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2249  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2250  1.203    atatat 		       CTLTYPE_INT, "ttl",
   2251  1.203    atatat 		       SYSCTL_DESCR("Default TTL for an INET datagram"),
   2252  1.189    atatat 		       NULL, 0, &ip_defttl, 0,
   2253  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2254  1.189    atatat 		       IPCTL_DEFTTL, CTL_EOL);
   2255  1.189    atatat #ifdef IPCTL_DEFMTU
   2256  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2257  1.197    atatat 		       CTLFLAG_PERMANENT /* |CTLFLAG_READWRITE? */,
   2258  1.203    atatat 		       CTLTYPE_INT, "mtu",
   2259  1.203    atatat 		       SYSCTL_DESCR("Default MTA for an INET route"),
   2260  1.189    atatat 		       NULL, 0, &ip_mtu, 0,
   2261  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2262  1.189    atatat 		       IPCTL_DEFMTU, CTL_EOL);
   2263  1.189    atatat #endif /* IPCTL_DEFMTU */
   2264  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2265  1.228      elad 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2266  1.203    atatat 		       CTLTYPE_INT, "forwsrcrt",
   2267  1.203    atatat 		       SYSCTL_DESCR("Enable forwarding of source-routed "
   2268  1.203    atatat 				    "datagrams"),
   2269  1.228      elad 		       sysctl_net_inet_ip_forwsrcrt, 0, &ip_forwsrcrt, 0,
   2270  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2271  1.189    atatat 		       IPCTL_FORWSRCRT, CTL_EOL);
   2272  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2273  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2274  1.203    atatat 		       CTLTYPE_INT, "directed-broadcast",
   2275  1.203    atatat 		       SYSCTL_DESCR("Enable forwarding of broadcast datagrams"),
   2276  1.189    atatat 		       NULL, 0, &ip_directedbcast, 0,
   2277  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2278  1.189    atatat 		       IPCTL_DIRECTEDBCAST, CTL_EOL);
   2279  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2280  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2281  1.203    atatat 		       CTLTYPE_INT, "allowsrcrt",
   2282  1.203    atatat 		       SYSCTL_DESCR("Accept source-routed datagrams"),
   2283  1.189    atatat 		       NULL, 0, &ip_allowsrcrt, 0,
   2284  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2285  1.189    atatat 		       IPCTL_ALLOWSRCRT, CTL_EOL);
   2286  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2287  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2288  1.203    atatat 		       CTLTYPE_INT, "subnetsarelocal",
   2289  1.203    atatat 		       SYSCTL_DESCR("Whether logical subnets are considered "
   2290  1.203    atatat 				    "local"),
   2291  1.189    atatat 		       NULL, 0, &subnetsarelocal, 0,
   2292  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2293  1.189    atatat 		       IPCTL_SUBNETSARELOCAL, CTL_EOL);
   2294  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2295  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2296  1.203    atatat 		       CTLTYPE_INT, "mtudisc",
   2297  1.203    atatat 		       SYSCTL_DESCR("Use RFC1191 Path MTU Discovery"),
   2298  1.189    atatat 		       NULL, 0, &ip_mtudisc, 0,
   2299  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2300  1.189    atatat 		       IPCTL_MTUDISC, CTL_EOL);
   2301  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2302  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2303  1.203    atatat 		       CTLTYPE_INT, "anonportmin",
   2304  1.203    atatat 		       SYSCTL_DESCR("Lowest ephemeral port number to assign"),
   2305  1.189    atatat 		       sysctl_net_inet_ip_ports, 0, &anonportmin, 0,
   2306  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2307  1.189    atatat 		       IPCTL_ANONPORTMIN, CTL_EOL);
   2308  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2309  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2310  1.203    atatat 		       CTLTYPE_INT, "anonportmax",
   2311  1.203    atatat 		       SYSCTL_DESCR("Highest ephemeral port number to assign"),
   2312  1.189    atatat 		       sysctl_net_inet_ip_ports, 0, &anonportmax, 0,
   2313  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2314  1.189    atatat 		       IPCTL_ANONPORTMAX, CTL_EOL);
   2315  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2316  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2317  1.203    atatat 		       CTLTYPE_INT, "mtudisctimeout",
   2318  1.203    atatat 		       SYSCTL_DESCR("Lifetime of a Path MTU Discovered route"),
   2319  1.189    atatat 		       sysctl_net_inet_ip_pmtudto, 0, &ip_mtudisc_timeout, 0,
   2320  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2321  1.189    atatat 		       IPCTL_MTUDISCTIMEOUT, CTL_EOL);
   2322  1.189    atatat #ifdef GATEWAY
   2323  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2324  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2325  1.203    atatat 		       CTLTYPE_INT, "maxflows",
   2326  1.203    atatat 		       SYSCTL_DESCR("Number of flows for fast forwarding"),
   2327  1.189    atatat 		       sysctl_net_inet_ip_maxflows, 0, &ip_maxflows, 0,
   2328  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2329  1.189    atatat 		       IPCTL_MAXFLOWS, CTL_EOL);
   2330  1.248  liamjfoy 	sysctl_createv(clog, 0, NULL, NULL,
   2331  1.248  liamjfoy 			CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2332  1.248  liamjfoy 			CTLTYPE_INT, "hashsize",
   2333  1.248  liamjfoy 			SYSCTL_DESCR("Size of hash table for fast forwarding (IPv4)"),
   2334  1.248  liamjfoy 			sysctl_net_inet_ip_hashsize, 0, &ip_hashsize, 0,
   2335  1.248  liamjfoy 			CTL_NET, PF_INET, IPPROTO_IP,
   2336  1.248  liamjfoy 			CTL_CREATE, CTL_EOL);
   2337  1.189    atatat #endif /* GATEWAY */
   2338  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2339  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2340  1.203    atatat 		       CTLTYPE_INT, "hostzerobroadcast",
   2341  1.203    atatat 		       SYSCTL_DESCR("All zeroes address is broadcast address"),
   2342  1.189    atatat 		       NULL, 0, &hostzeroisbroadcast, 0,
   2343  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2344  1.189    atatat 		       IPCTL_HOSTZEROBROADCAST, CTL_EOL);
   2345  1.189    atatat #if NGIF > 0
   2346  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2347  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2348  1.203    atatat 		       CTLTYPE_INT, "gifttl",
   2349  1.203    atatat 		       SYSCTL_DESCR("Default TTL for a gif tunnel datagram"),
   2350  1.189    atatat 		       NULL, 0, &ip_gif_ttl, 0,
   2351  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2352  1.189    atatat 		       IPCTL_GIF_TTL, CTL_EOL);
   2353  1.189    atatat #endif /* NGIF */
   2354  1.189    atatat #ifndef IPNOPRIVPORTS
   2355  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2356  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2357  1.203    atatat 		       CTLTYPE_INT, "lowportmin",
   2358  1.203    atatat 		       SYSCTL_DESCR("Lowest privileged ephemeral port number "
   2359  1.203    atatat 				    "to assign"),
   2360  1.189    atatat 		       sysctl_net_inet_ip_ports, 0, &lowportmin, 0,
   2361  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2362  1.189    atatat 		       IPCTL_LOWPORTMIN, CTL_EOL);
   2363  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2364  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2365  1.203    atatat 		       CTLTYPE_INT, "lowportmax",
   2366  1.203    atatat 		       SYSCTL_DESCR("Highest privileged ephemeral port number "
   2367  1.203    atatat 				    "to assign"),
   2368  1.189    atatat 		       sysctl_net_inet_ip_ports, 0, &lowportmax, 0,
   2369  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2370  1.189    atatat 		       IPCTL_LOWPORTMAX, CTL_EOL);
   2371  1.189    atatat #endif /* IPNOPRIVPORTS */
   2372  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2373  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2374  1.203    atatat 		       CTLTYPE_INT, "maxfragpackets",
   2375  1.203    atatat 		       SYSCTL_DESCR("Maximum number of fragments to retain for "
   2376  1.203    atatat 				    "possible reassembly"),
   2377  1.189    atatat 		       NULL, 0, &ip_maxfragpackets, 0,
   2378  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2379  1.189    atatat 		       IPCTL_MAXFRAGPACKETS, CTL_EOL);
   2380  1.189    atatat #if NGRE > 0
   2381  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2382  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2383  1.203    atatat 		       CTLTYPE_INT, "grettl",
   2384  1.203    atatat 		       SYSCTL_DESCR("Default TTL for a gre tunnel datagram"),
   2385  1.189    atatat 		       NULL, 0, &ip_gre_ttl, 0,
   2386  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2387  1.189    atatat 		       IPCTL_GRE_TTL, CTL_EOL);
   2388  1.189    atatat #endif /* NGRE */
   2389  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2390  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2391  1.203    atatat 		       CTLTYPE_INT, "checkinterface",
   2392  1.203    atatat 		       SYSCTL_DESCR("Enable receive side of Strong ES model "
   2393  1.203    atatat 				    "from RFC1122"),
   2394  1.189    atatat 		       NULL, 0, &ip_checkinterface, 0,
   2395  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2396  1.189    atatat 		       IPCTL_CHECKINTERFACE, CTL_EOL);
   2397  1.197    atatat 	sysctl_createv(clog, 0, NULL, NULL,
   2398  1.197    atatat 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2399  1.203    atatat 		       CTLTYPE_INT, "random_id",
   2400  1.203    atatat 		       SYSCTL_DESCR("Assign random ip_id values"),
   2401  1.189    atatat 		       NULL, 0, &ip_do_randomid, 0,
   2402  1.189    atatat 		       CTL_NET, PF_INET, IPPROTO_IP,
   2403  1.189    atatat 		       IPCTL_RANDOMID, CTL_EOL);
   2404  1.206   thorpej 	sysctl_createv(clog, 0, NULL, NULL,
   2405  1.206   thorpej 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
   2406  1.206   thorpej 		       CTLTYPE_INT, "do_loopback_cksum",
   2407  1.206   thorpej 		       SYSCTL_DESCR("Perform IP checksum on loopback"),
   2408  1.206   thorpej 		       NULL, 0, &ip_do_loopback_cksum, 0,
   2409  1.206   thorpej 		       CTL_NET, PF_INET, IPPROTO_IP,
   2410  1.206   thorpej 		       IPCTL_LOOPBACKCKSUM, CTL_EOL);
   2411  1.219      elad 	sysctl_createv(clog, 0, NULL, NULL,
   2412  1.219      elad 		       CTLFLAG_PERMANENT,
   2413  1.219      elad 		       CTLTYPE_STRUCT, "stats",
   2414  1.219      elad 		       SYSCTL_DESCR("IP statistics"),
   2415  1.219      elad 		       NULL, 0, &ipstat, sizeof(ipstat),
   2416  1.219      elad 		       CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS,
   2417  1.219      elad 		       CTL_EOL);
   2418    1.1       cgd }
   2419