ip_input.c revision 1.250.6.2 1 1.250.6.2 dyoung /* $NetBSD: ip_input.c,v 1.250.6.2 2007/07/19 20:48:56 dyoung Exp $ */
2 1.250.6.2 dyoung
3 1.250.6.2 dyoung /*
4 1.250.6.2 dyoung * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 1.250.6.2 dyoung * All rights reserved.
6 1.250.6.2 dyoung *
7 1.250.6.2 dyoung * Redistribution and use in source and binary forms, with or without
8 1.250.6.2 dyoung * modification, are permitted provided that the following conditions
9 1.250.6.2 dyoung * are met:
10 1.250.6.2 dyoung * 1. Redistributions of source code must retain the above copyright
11 1.250.6.2 dyoung * notice, this list of conditions and the following disclaimer.
12 1.250.6.2 dyoung * 2. Redistributions in binary form must reproduce the above copyright
13 1.250.6.2 dyoung * notice, this list of conditions and the following disclaimer in the
14 1.250.6.2 dyoung * documentation and/or other materials provided with the distribution.
15 1.250.6.2 dyoung * 3. Neither the name of the project nor the names of its contributors
16 1.250.6.2 dyoung * may be used to endorse or promote products derived from this software
17 1.250.6.2 dyoung * without specific prior written permission.
18 1.250.6.2 dyoung *
19 1.250.6.2 dyoung * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 1.250.6.2 dyoung * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.250.6.2 dyoung * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.250.6.2 dyoung * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 1.250.6.2 dyoung * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.250.6.2 dyoung * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.250.6.2 dyoung * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.250.6.2 dyoung * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.250.6.2 dyoung * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.250.6.2 dyoung * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.250.6.2 dyoung * SUCH DAMAGE.
30 1.250.6.2 dyoung */
31 1.250.6.2 dyoung
32 1.250.6.2 dyoung /*-
33 1.250.6.2 dyoung * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 1.250.6.2 dyoung * All rights reserved.
35 1.250.6.2 dyoung *
36 1.250.6.2 dyoung * This code is derived from software contributed to The NetBSD Foundation
37 1.250.6.2 dyoung * by Public Access Networks Corporation ("Panix"). It was developed under
38 1.250.6.2 dyoung * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 1.250.6.2 dyoung *
40 1.250.6.2 dyoung * Redistribution and use in source and binary forms, with or without
41 1.250.6.2 dyoung * modification, are permitted provided that the following conditions
42 1.250.6.2 dyoung * are met:
43 1.250.6.2 dyoung * 1. Redistributions of source code must retain the above copyright
44 1.250.6.2 dyoung * notice, this list of conditions and the following disclaimer.
45 1.250.6.2 dyoung * 2. Redistributions in binary form must reproduce the above copyright
46 1.250.6.2 dyoung * notice, this list of conditions and the following disclaimer in the
47 1.250.6.2 dyoung * documentation and/or other materials provided with the distribution.
48 1.250.6.2 dyoung * 3. All advertising materials mentioning features or use of this software
49 1.250.6.2 dyoung * must display the following acknowledgement:
50 1.250.6.2 dyoung * This product includes software developed by the NetBSD
51 1.250.6.2 dyoung * Foundation, Inc. and its contributors.
52 1.250.6.2 dyoung * 4. Neither the name of The NetBSD Foundation nor the names of its
53 1.250.6.2 dyoung * contributors may be used to endorse or promote products derived
54 1.250.6.2 dyoung * from this software without specific prior written permission.
55 1.250.6.2 dyoung *
56 1.250.6.2 dyoung * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 1.250.6.2 dyoung * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 1.250.6.2 dyoung * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 1.250.6.2 dyoung * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 1.250.6.2 dyoung * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 1.250.6.2 dyoung * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 1.250.6.2 dyoung * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 1.250.6.2 dyoung * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 1.250.6.2 dyoung * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 1.250.6.2 dyoung * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 1.250.6.2 dyoung * POSSIBILITY OF SUCH DAMAGE.
67 1.250.6.2 dyoung */
68 1.250.6.2 dyoung
69 1.250.6.2 dyoung /*
70 1.250.6.2 dyoung * Copyright (c) 1982, 1986, 1988, 1993
71 1.250.6.2 dyoung * The Regents of the University of California. All rights reserved.
72 1.250.6.2 dyoung *
73 1.250.6.2 dyoung * Redistribution and use in source and binary forms, with or without
74 1.250.6.2 dyoung * modification, are permitted provided that the following conditions
75 1.250.6.2 dyoung * are met:
76 1.250.6.2 dyoung * 1. Redistributions of source code must retain the above copyright
77 1.250.6.2 dyoung * notice, this list of conditions and the following disclaimer.
78 1.250.6.2 dyoung * 2. Redistributions in binary form must reproduce the above copyright
79 1.250.6.2 dyoung * notice, this list of conditions and the following disclaimer in the
80 1.250.6.2 dyoung * documentation and/or other materials provided with the distribution.
81 1.250.6.2 dyoung * 3. Neither the name of the University nor the names of its contributors
82 1.250.6.2 dyoung * may be used to endorse or promote products derived from this software
83 1.250.6.2 dyoung * without specific prior written permission.
84 1.250.6.2 dyoung *
85 1.250.6.2 dyoung * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
86 1.250.6.2 dyoung * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
87 1.250.6.2 dyoung * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
88 1.250.6.2 dyoung * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
89 1.250.6.2 dyoung * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
90 1.250.6.2 dyoung * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
91 1.250.6.2 dyoung * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
92 1.250.6.2 dyoung * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
93 1.250.6.2 dyoung * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
94 1.250.6.2 dyoung * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
95 1.250.6.2 dyoung * SUCH DAMAGE.
96 1.250.6.2 dyoung *
97 1.250.6.2 dyoung * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
98 1.250.6.2 dyoung */
99 1.250.6.2 dyoung
100 1.250.6.2 dyoung #include <sys/cdefs.h>
101 1.250.6.2 dyoung __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.250.6.2 2007/07/19 20:48:56 dyoung Exp $");
102 1.250.6.2 dyoung
103 1.250.6.2 dyoung #include "opt_inet.h"
104 1.250.6.2 dyoung #include "opt_gateway.h"
105 1.250.6.2 dyoung #include "opt_pfil_hooks.h"
106 1.250.6.2 dyoung #include "opt_ipsec.h"
107 1.250.6.2 dyoung #include "opt_mrouting.h"
108 1.250.6.2 dyoung #include "opt_mbuftrace.h"
109 1.250.6.2 dyoung #include "opt_inet_csum.h"
110 1.250.6.2 dyoung
111 1.250.6.2 dyoung #include <sys/param.h>
112 1.250.6.2 dyoung #include <sys/systm.h>
113 1.250.6.2 dyoung #include <sys/malloc.h>
114 1.250.6.2 dyoung #include <sys/mbuf.h>
115 1.250.6.2 dyoung #include <sys/domain.h>
116 1.250.6.2 dyoung #include <sys/protosw.h>
117 1.250.6.2 dyoung #include <sys/socket.h>
118 1.250.6.2 dyoung #include <sys/socketvar.h>
119 1.250.6.2 dyoung #include <sys/errno.h>
120 1.250.6.2 dyoung #include <sys/time.h>
121 1.250.6.2 dyoung #include <sys/kernel.h>
122 1.250.6.2 dyoung #include <sys/pool.h>
123 1.250.6.2 dyoung #include <sys/sysctl.h>
124 1.250.6.2 dyoung #include <sys/kauth.h>
125 1.250.6.2 dyoung
126 1.250.6.2 dyoung #include <net/if.h>
127 1.250.6.2 dyoung #include <net/if_dl.h>
128 1.250.6.2 dyoung #include <net/route.h>
129 1.250.6.2 dyoung #include <net/pfil.h>
130 1.250.6.2 dyoung
131 1.250.6.2 dyoung #include <netinet/in.h>
132 1.250.6.2 dyoung #include <netinet/in_systm.h>
133 1.250.6.2 dyoung #include <netinet/ip.h>
134 1.250.6.2 dyoung #include <netinet/in_pcb.h>
135 1.250.6.2 dyoung #include <netinet/in_proto.h>
136 1.250.6.2 dyoung #include <netinet/in_var.h>
137 1.250.6.2 dyoung #include <netinet/ip_var.h>
138 1.250.6.2 dyoung #include <netinet/ip_icmp.h>
139 1.250.6.2 dyoung /* just for gif_ttl */
140 1.250.6.2 dyoung #include <netinet/in_gif.h>
141 1.250.6.2 dyoung #include "gif.h"
142 1.250.6.2 dyoung #include <net/if_gre.h>
143 1.250.6.2 dyoung #include "gre.h"
144 1.250.6.2 dyoung
145 1.250.6.2 dyoung #ifdef MROUTING
146 1.250.6.2 dyoung #include <netinet/ip_mroute.h>
147 1.250.6.2 dyoung #endif
148 1.250.6.2 dyoung
149 1.250.6.2 dyoung #ifdef IPSEC
150 1.250.6.2 dyoung #include <netinet6/ipsec.h>
151 1.250.6.2 dyoung #include <netkey/key.h>
152 1.250.6.2 dyoung #endif
153 1.250.6.2 dyoung #ifdef FAST_IPSEC
154 1.250.6.2 dyoung #include <netipsec/ipsec.h>
155 1.250.6.2 dyoung #include <netipsec/key.h>
156 1.250.6.2 dyoung #endif /* FAST_IPSEC*/
157 1.250.6.2 dyoung
158 1.250.6.2 dyoung #ifndef IPFORWARDING
159 1.250.6.2 dyoung #ifdef GATEWAY
160 1.250.6.2 dyoung #define IPFORWARDING 1 /* forward IP packets not for us */
161 1.250.6.2 dyoung #else /* GATEWAY */
162 1.250.6.2 dyoung #define IPFORWARDING 0 /* don't forward IP packets not for us */
163 1.250.6.2 dyoung #endif /* GATEWAY */
164 1.250.6.2 dyoung #endif /* IPFORWARDING */
165 1.250.6.2 dyoung #ifndef IPSENDREDIRECTS
166 1.250.6.2 dyoung #define IPSENDREDIRECTS 1
167 1.250.6.2 dyoung #endif
168 1.250.6.2 dyoung #ifndef IPFORWSRCRT
169 1.250.6.2 dyoung #define IPFORWSRCRT 1 /* forward source-routed packets */
170 1.250.6.2 dyoung #endif
171 1.250.6.2 dyoung #ifndef IPALLOWSRCRT
172 1.250.6.2 dyoung #define IPALLOWSRCRT 1 /* allow source-routed packets */
173 1.250.6.2 dyoung #endif
174 1.250.6.2 dyoung #ifndef IPMTUDISC
175 1.250.6.2 dyoung #define IPMTUDISC 1
176 1.250.6.2 dyoung #endif
177 1.250.6.2 dyoung #ifndef IPMTUDISCTIMEOUT
178 1.250.6.2 dyoung #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */
179 1.250.6.2 dyoung #endif
180 1.250.6.2 dyoung
181 1.250.6.2 dyoung /*
182 1.250.6.2 dyoung * Note: DIRECTED_BROADCAST is handled this way so that previous
183 1.250.6.2 dyoung * configuration using this option will Just Work.
184 1.250.6.2 dyoung */
185 1.250.6.2 dyoung #ifndef IPDIRECTEDBCAST
186 1.250.6.2 dyoung #ifdef DIRECTED_BROADCAST
187 1.250.6.2 dyoung #define IPDIRECTEDBCAST 1
188 1.250.6.2 dyoung #else
189 1.250.6.2 dyoung #define IPDIRECTEDBCAST 0
190 1.250.6.2 dyoung #endif /* DIRECTED_BROADCAST */
191 1.250.6.2 dyoung #endif /* IPDIRECTEDBCAST */
192 1.250.6.2 dyoung int ipforwarding = IPFORWARDING;
193 1.250.6.2 dyoung int ipsendredirects = IPSENDREDIRECTS;
194 1.250.6.2 dyoung int ip_defttl = IPDEFTTL;
195 1.250.6.2 dyoung int ip_forwsrcrt = IPFORWSRCRT;
196 1.250.6.2 dyoung int ip_directedbcast = IPDIRECTEDBCAST;
197 1.250.6.2 dyoung int ip_allowsrcrt = IPALLOWSRCRT;
198 1.250.6.2 dyoung int ip_mtudisc = IPMTUDISC;
199 1.250.6.2 dyoung int ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
200 1.250.6.2 dyoung #ifdef DIAGNOSTIC
201 1.250.6.2 dyoung int ipprintfs = 0;
202 1.250.6.2 dyoung #endif
203 1.250.6.2 dyoung
204 1.250.6.2 dyoung int ip_do_randomid = 0;
205 1.250.6.2 dyoung
206 1.250.6.2 dyoung /*
207 1.250.6.2 dyoung * XXX - Setting ip_checkinterface mostly implements the receive side of
208 1.250.6.2 dyoung * the Strong ES model described in RFC 1122, but since the routing table
209 1.250.6.2 dyoung * and transmit implementation do not implement the Strong ES model,
210 1.250.6.2 dyoung * setting this to 1 results in an odd hybrid.
211 1.250.6.2 dyoung *
212 1.250.6.2 dyoung * XXX - ip_checkinterface currently must be disabled if you use ipnat
213 1.250.6.2 dyoung * to translate the destination address to another local interface.
214 1.250.6.2 dyoung *
215 1.250.6.2 dyoung * XXX - ip_checkinterface must be disabled if you add IP aliases
216 1.250.6.2 dyoung * to the loopback interface instead of the interface where the
217 1.250.6.2 dyoung * packets for those addresses are received.
218 1.250.6.2 dyoung */
219 1.250.6.2 dyoung int ip_checkinterface = 0;
220 1.250.6.2 dyoung
221 1.250.6.2 dyoung
222 1.250.6.2 dyoung struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
223 1.250.6.2 dyoung
224 1.250.6.2 dyoung int ipqmaxlen = IFQ_MAXLEN;
225 1.250.6.2 dyoung u_long in_ifaddrhash; /* size of hash table - 1 */
226 1.250.6.2 dyoung int in_ifaddrentries; /* total number of addrs */
227 1.250.6.2 dyoung struct in_ifaddrhead in_ifaddrhead;
228 1.250.6.2 dyoung struct in_ifaddrhashhead *in_ifaddrhashtbl;
229 1.250.6.2 dyoung u_long in_multihash; /* size of hash table - 1 */
230 1.250.6.2 dyoung int in_multientries; /* total number of addrs */
231 1.250.6.2 dyoung struct in_multihashhead *in_multihashtbl;
232 1.250.6.2 dyoung struct ifqueue ipintrq;
233 1.250.6.2 dyoung struct ipstat ipstat;
234 1.250.6.2 dyoung uint16_t ip_id;
235 1.250.6.2 dyoung
236 1.250.6.2 dyoung #ifdef PFIL_HOOKS
237 1.250.6.2 dyoung struct pfil_head inet_pfil_hook;
238 1.250.6.2 dyoung #endif
239 1.250.6.2 dyoung
240 1.250.6.2 dyoung /*
241 1.250.6.2 dyoung * Cached copy of nmbclusters. If nbclusters is different,
242 1.250.6.2 dyoung * recalculate IP parameters derived from nmbclusters.
243 1.250.6.2 dyoung */
244 1.250.6.2 dyoung static int ip_nmbclusters; /* copy of nmbclusters */
245 1.250.6.2 dyoung static void ip_nmbclusters_changed(void); /* recalc limits */
246 1.250.6.2 dyoung
247 1.250.6.2 dyoung #define CHECK_NMBCLUSTER_PARAMS() \
248 1.250.6.2 dyoung do { \
249 1.250.6.2 dyoung if (__predict_false(ip_nmbclusters != nmbclusters)) \
250 1.250.6.2 dyoung ip_nmbclusters_changed(); \
251 1.250.6.2 dyoung } while (/*CONSTCOND*/0)
252 1.250.6.2 dyoung
253 1.250.6.2 dyoung /* IP datagram reassembly queues (hashed) */
254 1.250.6.2 dyoung #define IPREASS_NHASH_LOG2 6
255 1.250.6.2 dyoung #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
256 1.250.6.2 dyoung #define IPREASS_HMASK (IPREASS_NHASH - 1)
257 1.250.6.2 dyoung #define IPREASS_HASH(x,y) \
258 1.250.6.2 dyoung (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
259 1.250.6.2 dyoung struct ipqhead ipq[IPREASS_NHASH];
260 1.250.6.2 dyoung int ipq_locked;
261 1.250.6.2 dyoung static int ip_nfragpackets; /* packets in reass queue */
262 1.250.6.2 dyoung static int ip_nfrags; /* total fragments in reass queues */
263 1.250.6.2 dyoung
264 1.250.6.2 dyoung int ip_maxfragpackets = 200; /* limit on packets. XXX sysctl */
265 1.250.6.2 dyoung int ip_maxfrags; /* limit on fragments. XXX sysctl */
266 1.250.6.2 dyoung
267 1.250.6.2 dyoung
268 1.250.6.2 dyoung /*
269 1.250.6.2 dyoung * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for
270 1.250.6.2 dyoung * IP reassembly queue buffer managment.
271 1.250.6.2 dyoung *
272 1.250.6.2 dyoung * We keep a count of total IP fragments (NB: not fragmented packets!)
273 1.250.6.2 dyoung * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments.
274 1.250.6.2 dyoung * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the
275 1.250.6.2 dyoung * total fragments in reassembly queues.This AIMD policy avoids
276 1.250.6.2 dyoung * repeatedly deleting single packets under heavy fragmentation load
277 1.250.6.2 dyoung * (e.g., from lossy NFS peers).
278 1.250.6.2 dyoung */
279 1.250.6.2 dyoung static u_int ip_reass_ttl_decr(u_int ticks);
280 1.250.6.2 dyoung static void ip_reass_drophalf(void);
281 1.250.6.2 dyoung
282 1.250.6.2 dyoung
283 1.250.6.2 dyoung static inline int ipq_lock_try(void);
284 1.250.6.2 dyoung static inline void ipq_unlock(void);
285 1.250.6.2 dyoung
286 1.250.6.2 dyoung static inline int
287 1.250.6.2 dyoung ipq_lock_try(void)
288 1.250.6.2 dyoung {
289 1.250.6.2 dyoung int s;
290 1.250.6.2 dyoung
291 1.250.6.2 dyoung /*
292 1.250.6.2 dyoung * Use splvm() -- we're blocking things that would cause
293 1.250.6.2 dyoung * mbuf allocation.
294 1.250.6.2 dyoung */
295 1.250.6.2 dyoung s = splvm();
296 1.250.6.2 dyoung if (ipq_locked) {
297 1.250.6.2 dyoung splx(s);
298 1.250.6.2 dyoung return (0);
299 1.250.6.2 dyoung }
300 1.250.6.2 dyoung ipq_locked = 1;
301 1.250.6.2 dyoung splx(s);
302 1.250.6.2 dyoung return (1);
303 1.250.6.2 dyoung }
304 1.250.6.2 dyoung
305 1.250.6.2 dyoung static inline void
306 1.250.6.2 dyoung ipq_unlock(void)
307 1.250.6.2 dyoung {
308 1.250.6.2 dyoung int s;
309 1.250.6.2 dyoung
310 1.250.6.2 dyoung s = splvm();
311 1.250.6.2 dyoung ipq_locked = 0;
312 1.250.6.2 dyoung splx(s);
313 1.250.6.2 dyoung }
314 1.250.6.2 dyoung
315 1.250.6.2 dyoung #ifdef DIAGNOSTIC
316 1.250.6.2 dyoung #define IPQ_LOCK() \
317 1.250.6.2 dyoung do { \
318 1.250.6.2 dyoung if (ipq_lock_try() == 0) { \
319 1.250.6.2 dyoung printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
320 1.250.6.2 dyoung panic("ipq_lock"); \
321 1.250.6.2 dyoung } \
322 1.250.6.2 dyoung } while (/*CONSTCOND*/ 0)
323 1.250.6.2 dyoung #define IPQ_LOCK_CHECK() \
324 1.250.6.2 dyoung do { \
325 1.250.6.2 dyoung if (ipq_locked == 0) { \
326 1.250.6.2 dyoung printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
327 1.250.6.2 dyoung panic("ipq lock check"); \
328 1.250.6.2 dyoung } \
329 1.250.6.2 dyoung } while (/*CONSTCOND*/ 0)
330 1.250.6.2 dyoung #else
331 1.250.6.2 dyoung #define IPQ_LOCK() (void) ipq_lock_try()
332 1.250.6.2 dyoung #define IPQ_LOCK_CHECK() /* nothing */
333 1.250.6.2 dyoung #endif
334 1.250.6.2 dyoung
335 1.250.6.2 dyoung #define IPQ_UNLOCK() ipq_unlock()
336 1.250.6.2 dyoung
337 1.250.6.2 dyoung POOL_INIT(inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", NULL,
338 1.250.6.2 dyoung IPL_SOFTNET);
339 1.250.6.2 dyoung POOL_INIT(ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", NULL,
340 1.250.6.2 dyoung IPL_VM);
341 1.250.6.2 dyoung
342 1.250.6.2 dyoung #ifdef INET_CSUM_COUNTERS
343 1.250.6.2 dyoung #include <sys/device.h>
344 1.250.6.2 dyoung
345 1.250.6.2 dyoung struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
346 1.250.6.2 dyoung NULL, "inet", "hwcsum bad");
347 1.250.6.2 dyoung struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
348 1.250.6.2 dyoung NULL, "inet", "hwcsum ok");
349 1.250.6.2 dyoung struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
350 1.250.6.2 dyoung NULL, "inet", "swcsum");
351 1.250.6.2 dyoung
352 1.250.6.2 dyoung #define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
353 1.250.6.2 dyoung
354 1.250.6.2 dyoung EVCNT_ATTACH_STATIC(ip_hwcsum_bad);
355 1.250.6.2 dyoung EVCNT_ATTACH_STATIC(ip_hwcsum_ok);
356 1.250.6.2 dyoung EVCNT_ATTACH_STATIC(ip_swcsum);
357 1.250.6.2 dyoung
358 1.250.6.2 dyoung #else
359 1.250.6.2 dyoung
360 1.250.6.2 dyoung #define INET_CSUM_COUNTER_INCR(ev) /* nothing */
361 1.250.6.2 dyoung
362 1.250.6.2 dyoung #endif /* INET_CSUM_COUNTERS */
363 1.250.6.2 dyoung
364 1.250.6.2 dyoung /*
365 1.250.6.2 dyoung * We need to save the IP options in case a protocol wants to respond
366 1.250.6.2 dyoung * to an incoming packet over the same route if the packet got here
367 1.250.6.2 dyoung * using IP source routing. This allows connection establishment and
368 1.250.6.2 dyoung * maintenance when the remote end is on a network that is not known
369 1.250.6.2 dyoung * to us.
370 1.250.6.2 dyoung */
371 1.250.6.2 dyoung int ip_nhops = 0;
372 1.250.6.2 dyoung static struct ip_srcrt {
373 1.250.6.2 dyoung struct in_addr dst; /* final destination */
374 1.250.6.2 dyoung char nop; /* one NOP to align */
375 1.250.6.2 dyoung char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
376 1.250.6.2 dyoung struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
377 1.250.6.2 dyoung } ip_srcrt;
378 1.250.6.2 dyoung
379 1.250.6.2 dyoung static void save_rte(u_char *, struct in_addr);
380 1.250.6.2 dyoung
381 1.250.6.2 dyoung #ifdef MBUFTRACE
382 1.250.6.2 dyoung struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx");
383 1.250.6.2 dyoung struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx");
384 1.250.6.2 dyoung #endif
385 1.250.6.2 dyoung
386 1.250.6.2 dyoung /*
387 1.250.6.2 dyoung * Compute IP limits derived from the value of nmbclusters.
388 1.250.6.2 dyoung */
389 1.250.6.2 dyoung static void
390 1.250.6.2 dyoung ip_nmbclusters_changed(void)
391 1.250.6.2 dyoung {
392 1.250.6.2 dyoung ip_maxfrags = nmbclusters / 4;
393 1.250.6.2 dyoung ip_nmbclusters = nmbclusters;
394 1.250.6.2 dyoung }
395 1.250.6.2 dyoung
396 1.250.6.2 dyoung /*
397 1.250.6.2 dyoung * IP initialization: fill in IP protocol switch table.
398 1.250.6.2 dyoung * All protocols not implemented in kernel go to raw IP protocol handler.
399 1.250.6.2 dyoung */
400 1.250.6.2 dyoung void
401 1.250.6.2 dyoung ip_init(void)
402 1.250.6.2 dyoung {
403 1.250.6.2 dyoung const struct protosw *pr;
404 1.250.6.2 dyoung int i;
405 1.250.6.2 dyoung
406 1.250.6.2 dyoung pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
407 1.250.6.2 dyoung if (pr == 0)
408 1.250.6.2 dyoung panic("ip_init");
409 1.250.6.2 dyoung for (i = 0; i < IPPROTO_MAX; i++)
410 1.250.6.2 dyoung ip_protox[i] = pr - inetsw;
411 1.250.6.2 dyoung for (pr = inetdomain.dom_protosw;
412 1.250.6.2 dyoung pr < inetdomain.dom_protoswNPROTOSW; pr++)
413 1.250.6.2 dyoung if (pr->pr_domain->dom_family == PF_INET &&
414 1.250.6.2 dyoung pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
415 1.250.6.2 dyoung ip_protox[pr->pr_protocol] = pr - inetsw;
416 1.250.6.2 dyoung
417 1.250.6.2 dyoung for (i = 0; i < IPREASS_NHASH; i++)
418 1.250.6.2 dyoung LIST_INIT(&ipq[i]);
419 1.250.6.2 dyoung
420 1.250.6.2 dyoung ip_id = time_second & 0xfffff;
421 1.250.6.2 dyoung
422 1.250.6.2 dyoung ipintrq.ifq_maxlen = ipqmaxlen;
423 1.250.6.2 dyoung ip_nmbclusters_changed();
424 1.250.6.2 dyoung
425 1.250.6.2 dyoung TAILQ_INIT(&in_ifaddrhead);
426 1.250.6.2 dyoung in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
427 1.250.6.2 dyoung M_WAITOK, &in_ifaddrhash);
428 1.250.6.2 dyoung in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IPMADDR,
429 1.250.6.2 dyoung M_WAITOK, &in_multihash);
430 1.250.6.2 dyoung ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout);
431 1.250.6.2 dyoung #ifdef GATEWAY
432 1.250.6.2 dyoung ipflow_init(ip_hashsize);
433 1.250.6.2 dyoung #endif
434 1.250.6.2 dyoung
435 1.250.6.2 dyoung #ifdef PFIL_HOOKS
436 1.250.6.2 dyoung /* Register our Packet Filter hook. */
437 1.250.6.2 dyoung inet_pfil_hook.ph_type = PFIL_TYPE_AF;
438 1.250.6.2 dyoung inet_pfil_hook.ph_af = AF_INET;
439 1.250.6.2 dyoung i = pfil_head_register(&inet_pfil_hook);
440 1.250.6.2 dyoung if (i != 0)
441 1.250.6.2 dyoung printf("ip_init: WARNING: unable to register pfil hook, "
442 1.250.6.2 dyoung "error %d\n", i);
443 1.250.6.2 dyoung #endif /* PFIL_HOOKS */
444 1.250.6.2 dyoung
445 1.250.6.2 dyoung #ifdef MBUFTRACE
446 1.250.6.2 dyoung MOWNER_ATTACH(&ip_tx_mowner);
447 1.250.6.2 dyoung MOWNER_ATTACH(&ip_rx_mowner);
448 1.250.6.2 dyoung #endif /* MBUFTRACE */
449 1.250.6.2 dyoung }
450 1.250.6.2 dyoung
451 1.250.6.2 dyoung struct sockaddr_in ipaddr = {
452 1.250.6.2 dyoung .sin_len = sizeof(ipaddr),
453 1.250.6.2 dyoung .sin_family = AF_INET,
454 1.250.6.2 dyoung };
455 1.250.6.2 dyoung struct route ipforward_rt;
456 1.250.6.2 dyoung
457 1.250.6.2 dyoung /*
458 1.250.6.2 dyoung * IP software interrupt routine
459 1.250.6.2 dyoung */
460 1.250.6.2 dyoung void
461 1.250.6.2 dyoung ipintr(void)
462 1.250.6.2 dyoung {
463 1.250.6.2 dyoung int s;
464 1.250.6.2 dyoung struct mbuf *m;
465 1.250.6.2 dyoung
466 1.250.6.2 dyoung while (!IF_IS_EMPTY(&ipintrq)) {
467 1.250.6.2 dyoung s = splnet();
468 1.250.6.2 dyoung IF_DEQUEUE(&ipintrq, m);
469 1.250.6.2 dyoung splx(s);
470 1.250.6.2 dyoung if (m == 0)
471 1.250.6.2 dyoung return;
472 1.250.6.2 dyoung MCLAIM(m, &ip_rx_mowner);
473 1.250.6.2 dyoung ip_input(m);
474 1.250.6.2 dyoung }
475 1.250.6.2 dyoung }
476 1.250.6.2 dyoung
477 1.250.6.2 dyoung /*
478 1.250.6.2 dyoung * Ip input routine. Checksum and byte swap header. If fragmented
479 1.250.6.2 dyoung * try to reassemble. Process options. Pass to next level.
480 1.250.6.2 dyoung */
481 1.250.6.2 dyoung void
482 1.250.6.2 dyoung ip_input(struct mbuf *m)
483 1.250.6.2 dyoung {
484 1.250.6.2 dyoung struct ip *ip = NULL;
485 1.250.6.2 dyoung struct ipq *fp;
486 1.250.6.2 dyoung struct in_ifaddr *ia;
487 1.250.6.2 dyoung struct ifaddr *ifa;
488 1.250.6.2 dyoung struct ipqent *ipqe;
489 1.250.6.2 dyoung int hlen = 0, mff, len;
490 1.250.6.2 dyoung int downmatch;
491 1.250.6.2 dyoung int checkif;
492 1.250.6.2 dyoung int srcrt = 0;
493 1.250.6.2 dyoung int s;
494 1.250.6.2 dyoung u_int hash;
495 1.250.6.2 dyoung #ifdef FAST_IPSEC
496 1.250.6.2 dyoung struct m_tag *mtag;
497 1.250.6.2 dyoung struct tdb_ident *tdbi;
498 1.250.6.2 dyoung struct secpolicy *sp;
499 1.250.6.2 dyoung int error;
500 1.250.6.2 dyoung #endif /* FAST_IPSEC */
501 1.250.6.2 dyoung
502 1.250.6.2 dyoung MCLAIM(m, &ip_rx_mowner);
503 1.250.6.2 dyoung #ifdef DIAGNOSTIC
504 1.250.6.2 dyoung if ((m->m_flags & M_PKTHDR) == 0)
505 1.250.6.2 dyoung panic("ipintr no HDR");
506 1.250.6.2 dyoung #endif
507 1.250.6.2 dyoung
508 1.250.6.2 dyoung /*
509 1.250.6.2 dyoung * If no IP addresses have been set yet but the interfaces
510 1.250.6.2 dyoung * are receiving, can't do anything with incoming packets yet.
511 1.250.6.2 dyoung */
512 1.250.6.2 dyoung if (TAILQ_FIRST(&in_ifaddrhead) == 0)
513 1.250.6.2 dyoung goto bad;
514 1.250.6.2 dyoung ipstat.ips_total++;
515 1.250.6.2 dyoung /*
516 1.250.6.2 dyoung * If the IP header is not aligned, slurp it up into a new
517 1.250.6.2 dyoung * mbuf with space for link headers, in the event we forward
518 1.250.6.2 dyoung * it. Otherwise, if it is aligned, make sure the entire
519 1.250.6.2 dyoung * base IP header is in the first mbuf of the chain.
520 1.250.6.2 dyoung */
521 1.250.6.2 dyoung if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
522 1.250.6.2 dyoung if ((m = m_copyup(m, sizeof(struct ip),
523 1.250.6.2 dyoung (max_linkhdr + 3) & ~3)) == NULL) {
524 1.250.6.2 dyoung /* XXXJRT new stat, please */
525 1.250.6.2 dyoung ipstat.ips_toosmall++;
526 1.250.6.2 dyoung return;
527 1.250.6.2 dyoung }
528 1.250.6.2 dyoung } else if (__predict_false(m->m_len < sizeof (struct ip))) {
529 1.250.6.2 dyoung if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
530 1.250.6.2 dyoung ipstat.ips_toosmall++;
531 1.250.6.2 dyoung return;
532 1.250.6.2 dyoung }
533 1.250.6.2 dyoung }
534 1.250.6.2 dyoung ip = mtod(m, struct ip *);
535 1.250.6.2 dyoung if (ip->ip_v != IPVERSION) {
536 1.250.6.2 dyoung ipstat.ips_badvers++;
537 1.250.6.2 dyoung goto bad;
538 1.250.6.2 dyoung }
539 1.250.6.2 dyoung hlen = ip->ip_hl << 2;
540 1.250.6.2 dyoung if (hlen < sizeof(struct ip)) { /* minimum header length */
541 1.250.6.2 dyoung ipstat.ips_badhlen++;
542 1.250.6.2 dyoung goto bad;
543 1.250.6.2 dyoung }
544 1.250.6.2 dyoung if (hlen > m->m_len) {
545 1.250.6.2 dyoung if ((m = m_pullup(m, hlen)) == 0) {
546 1.250.6.2 dyoung ipstat.ips_badhlen++;
547 1.250.6.2 dyoung return;
548 1.250.6.2 dyoung }
549 1.250.6.2 dyoung ip = mtod(m, struct ip *);
550 1.250.6.2 dyoung }
551 1.250.6.2 dyoung
552 1.250.6.2 dyoung /*
553 1.250.6.2 dyoung * RFC1122: packets with a multicast source address are
554 1.250.6.2 dyoung * not allowed.
555 1.250.6.2 dyoung */
556 1.250.6.2 dyoung if (IN_MULTICAST(ip->ip_src.s_addr)) {
557 1.250.6.2 dyoung ipstat.ips_badaddr++;
558 1.250.6.2 dyoung goto bad;
559 1.250.6.2 dyoung }
560 1.250.6.2 dyoung
561 1.250.6.2 dyoung /* 127/8 must not appear on wire - RFC1122 */
562 1.250.6.2 dyoung if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
563 1.250.6.2 dyoung (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
564 1.250.6.2 dyoung if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
565 1.250.6.2 dyoung ipstat.ips_badaddr++;
566 1.250.6.2 dyoung goto bad;
567 1.250.6.2 dyoung }
568 1.250.6.2 dyoung }
569 1.250.6.2 dyoung
570 1.250.6.2 dyoung switch (m->m_pkthdr.csum_flags &
571 1.250.6.2 dyoung ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
572 1.250.6.2 dyoung M_CSUM_IPv4_BAD)) {
573 1.250.6.2 dyoung case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
574 1.250.6.2 dyoung INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
575 1.250.6.2 dyoung goto badcsum;
576 1.250.6.2 dyoung
577 1.250.6.2 dyoung case M_CSUM_IPv4:
578 1.250.6.2 dyoung /* Checksum was okay. */
579 1.250.6.2 dyoung INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
580 1.250.6.2 dyoung break;
581 1.250.6.2 dyoung
582 1.250.6.2 dyoung default:
583 1.250.6.2 dyoung /*
584 1.250.6.2 dyoung * Must compute it ourselves. Maybe skip checksum on
585 1.250.6.2 dyoung * loopback interfaces.
586 1.250.6.2 dyoung */
587 1.250.6.2 dyoung if (__predict_true(!(m->m_pkthdr.rcvif->if_flags &
588 1.250.6.2 dyoung IFF_LOOPBACK) || ip_do_loopback_cksum)) {
589 1.250.6.2 dyoung INET_CSUM_COUNTER_INCR(&ip_swcsum);
590 1.250.6.2 dyoung if (in_cksum(m, hlen) != 0)
591 1.250.6.2 dyoung goto badcsum;
592 1.250.6.2 dyoung }
593 1.250.6.2 dyoung break;
594 1.250.6.2 dyoung }
595 1.250.6.2 dyoung
596 1.250.6.2 dyoung /* Retrieve the packet length. */
597 1.250.6.2 dyoung len = ntohs(ip->ip_len);
598 1.250.6.2 dyoung
599 1.250.6.2 dyoung /*
600 1.250.6.2 dyoung * Check for additional length bogosity
601 1.250.6.2 dyoung */
602 1.250.6.2 dyoung if (len < hlen) {
603 1.250.6.2 dyoung ipstat.ips_badlen++;
604 1.250.6.2 dyoung goto bad;
605 1.250.6.2 dyoung }
606 1.250.6.2 dyoung
607 1.250.6.2 dyoung /*
608 1.250.6.2 dyoung * Check that the amount of data in the buffers
609 1.250.6.2 dyoung * is as at least much as the IP header would have us expect.
610 1.250.6.2 dyoung * Trim mbufs if longer than we expect.
611 1.250.6.2 dyoung * Drop packet if shorter than we expect.
612 1.250.6.2 dyoung */
613 1.250.6.2 dyoung if (m->m_pkthdr.len < len) {
614 1.250.6.2 dyoung ipstat.ips_tooshort++;
615 1.250.6.2 dyoung goto bad;
616 1.250.6.2 dyoung }
617 1.250.6.2 dyoung if (m->m_pkthdr.len > len) {
618 1.250.6.2 dyoung if (m->m_len == m->m_pkthdr.len) {
619 1.250.6.2 dyoung m->m_len = len;
620 1.250.6.2 dyoung m->m_pkthdr.len = len;
621 1.250.6.2 dyoung } else
622 1.250.6.2 dyoung m_adj(m, len - m->m_pkthdr.len);
623 1.250.6.2 dyoung }
624 1.250.6.2 dyoung
625 1.250.6.2 dyoung #if defined(IPSEC)
626 1.250.6.2 dyoung /* ipflow (IP fast forwarding) is not compatible with IPsec. */
627 1.250.6.2 dyoung m->m_flags &= ~M_CANFASTFWD;
628 1.250.6.2 dyoung #else
629 1.250.6.2 dyoung /*
630 1.250.6.2 dyoung * Assume that we can create a fast-forward IP flow entry
631 1.250.6.2 dyoung * based on this packet.
632 1.250.6.2 dyoung */
633 1.250.6.2 dyoung m->m_flags |= M_CANFASTFWD;
634 1.250.6.2 dyoung #endif
635 1.250.6.2 dyoung
636 1.250.6.2 dyoung #ifdef PFIL_HOOKS
637 1.250.6.2 dyoung /*
638 1.250.6.2 dyoung * Run through list of hooks for input packets. If there are any
639 1.250.6.2 dyoung * filters which require that additional packets in the flow are
640 1.250.6.2 dyoung * not fast-forwarded, they must clear the M_CANFASTFWD flag.
641 1.250.6.2 dyoung * Note that filters must _never_ set this flag, as another filter
642 1.250.6.2 dyoung * in the list may have previously cleared it.
643 1.250.6.2 dyoung */
644 1.250.6.2 dyoung /*
645 1.250.6.2 dyoung * let ipfilter look at packet on the wire,
646 1.250.6.2 dyoung * not the decapsulated packet.
647 1.250.6.2 dyoung */
648 1.250.6.2 dyoung #ifdef IPSEC
649 1.250.6.2 dyoung if (!ipsec_getnhist(m))
650 1.250.6.2 dyoung #elif defined(FAST_IPSEC)
651 1.250.6.2 dyoung if (!ipsec_indone(m))
652 1.250.6.2 dyoung #else
653 1.250.6.2 dyoung if (1)
654 1.250.6.2 dyoung #endif
655 1.250.6.2 dyoung {
656 1.250.6.2 dyoung struct in_addr odst;
657 1.250.6.2 dyoung
658 1.250.6.2 dyoung odst = ip->ip_dst;
659 1.250.6.2 dyoung if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
660 1.250.6.2 dyoung PFIL_IN) != 0)
661 1.250.6.2 dyoung return;
662 1.250.6.2 dyoung if (m == NULL)
663 1.250.6.2 dyoung return;
664 1.250.6.2 dyoung ip = mtod(m, struct ip *);
665 1.250.6.2 dyoung hlen = ip->ip_hl << 2;
666 1.250.6.2 dyoung /*
667 1.250.6.2 dyoung * XXX The setting of "srcrt" here is to prevent ip_forward()
668 1.250.6.2 dyoung * from generating ICMP redirects for packets that have
669 1.250.6.2 dyoung * been redirected by a hook back out on to the same LAN that
670 1.250.6.2 dyoung * they came from and is not an indication that the packet
671 1.250.6.2 dyoung * is being inffluenced by source routing options. This
672 1.250.6.2 dyoung * allows things like
673 1.250.6.2 dyoung * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp"
674 1.250.6.2 dyoung * where tlp0 is both on the 1.1.1.0/24 network and is the
675 1.250.6.2 dyoung * default route for hosts on 1.1.1.0/24. Of course this
676 1.250.6.2 dyoung * also requires a "map tlp0 ..." to complete the story.
677 1.250.6.2 dyoung * One might argue whether or not this kind of network config.
678 1.250.6.2 dyoung * should be supported in this manner...
679 1.250.6.2 dyoung */
680 1.250.6.2 dyoung srcrt = (odst.s_addr != ip->ip_dst.s_addr);
681 1.250.6.2 dyoung }
682 1.250.6.2 dyoung #endif /* PFIL_HOOKS */
683 1.250.6.2 dyoung
684 1.250.6.2 dyoung #ifdef ALTQ
685 1.250.6.2 dyoung /* XXX Temporary until ALTQ is changed to use a pfil hook */
686 1.250.6.2 dyoung if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) {
687 1.250.6.2 dyoung /* packet dropped by traffic conditioner */
688 1.250.6.2 dyoung return;
689 1.250.6.2 dyoung }
690 1.250.6.2 dyoung #endif
691 1.250.6.2 dyoung
692 1.250.6.2 dyoung /*
693 1.250.6.2 dyoung * Process options and, if not destined for us,
694 1.250.6.2 dyoung * ship it on. ip_dooptions returns 1 when an
695 1.250.6.2 dyoung * error was detected (causing an icmp message
696 1.250.6.2 dyoung * to be sent and the original packet to be freed).
697 1.250.6.2 dyoung */
698 1.250.6.2 dyoung ip_nhops = 0; /* for source routed packets */
699 1.250.6.2 dyoung if (hlen > sizeof (struct ip) && ip_dooptions(m))
700 1.250.6.2 dyoung return;
701 1.250.6.2 dyoung
702 1.250.6.2 dyoung /*
703 1.250.6.2 dyoung * Enable a consistency check between the destination address
704 1.250.6.2 dyoung * and the arrival interface for a unicast packet (the RFC 1122
705 1.250.6.2 dyoung * strong ES model) if IP forwarding is disabled and the packet
706 1.250.6.2 dyoung * is not locally generated.
707 1.250.6.2 dyoung *
708 1.250.6.2 dyoung * XXX - Checking also should be disabled if the destination
709 1.250.6.2 dyoung * address is ipnat'ed to a different interface.
710 1.250.6.2 dyoung *
711 1.250.6.2 dyoung * XXX - Checking is incompatible with IP aliases added
712 1.250.6.2 dyoung * to the loopback interface instead of the interface where
713 1.250.6.2 dyoung * the packets are received.
714 1.250.6.2 dyoung *
715 1.250.6.2 dyoung * XXX - We need to add a per ifaddr flag for this so that
716 1.250.6.2 dyoung * we get finer grain control.
717 1.250.6.2 dyoung */
718 1.250.6.2 dyoung checkif = ip_checkinterface && (ipforwarding == 0) &&
719 1.250.6.2 dyoung (m->m_pkthdr.rcvif != NULL) &&
720 1.250.6.2 dyoung ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0);
721 1.250.6.2 dyoung
722 1.250.6.2 dyoung /*
723 1.250.6.2 dyoung * Check our list of addresses, to see if the packet is for us.
724 1.250.6.2 dyoung *
725 1.250.6.2 dyoung * Traditional 4.4BSD did not consult IFF_UP at all.
726 1.250.6.2 dyoung * The behavior here is to treat addresses on !IFF_UP interface
727 1.250.6.2 dyoung * as not mine.
728 1.250.6.2 dyoung */
729 1.250.6.2 dyoung downmatch = 0;
730 1.250.6.2 dyoung LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
731 1.250.6.2 dyoung if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
732 1.250.6.2 dyoung if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif)
733 1.250.6.2 dyoung continue;
734 1.250.6.2 dyoung if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
735 1.250.6.2 dyoung break;
736 1.250.6.2 dyoung else
737 1.250.6.2 dyoung downmatch++;
738 1.250.6.2 dyoung }
739 1.250.6.2 dyoung }
740 1.250.6.2 dyoung if (ia != NULL)
741 1.250.6.2 dyoung goto ours;
742 1.250.6.2 dyoung if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
743 1.250.6.2 dyoung IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) {
744 1.250.6.2 dyoung if (ifa->ifa_addr->sa_family != AF_INET)
745 1.250.6.2 dyoung continue;
746 1.250.6.2 dyoung ia = ifatoia(ifa);
747 1.250.6.2 dyoung if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
748 1.250.6.2 dyoung in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
749 1.250.6.2 dyoung /*
750 1.250.6.2 dyoung * Look for all-0's host part (old broadcast addr),
751 1.250.6.2 dyoung * either for subnet or net.
752 1.250.6.2 dyoung */
753 1.250.6.2 dyoung ip->ip_dst.s_addr == ia->ia_subnet ||
754 1.250.6.2 dyoung ip->ip_dst.s_addr == ia->ia_net)
755 1.250.6.2 dyoung goto ours;
756 1.250.6.2 dyoung /*
757 1.250.6.2 dyoung * An interface with IP address zero accepts
758 1.250.6.2 dyoung * all packets that arrive on that interface.
759 1.250.6.2 dyoung */
760 1.250.6.2 dyoung if (in_nullhost(ia->ia_addr.sin_addr))
761 1.250.6.2 dyoung goto ours;
762 1.250.6.2 dyoung }
763 1.250.6.2 dyoung }
764 1.250.6.2 dyoung if (IN_MULTICAST(ip->ip_dst.s_addr)) {
765 1.250.6.2 dyoung struct in_multi *inm;
766 1.250.6.2 dyoung #ifdef MROUTING
767 1.250.6.2 dyoung extern struct socket *ip_mrouter;
768 1.250.6.2 dyoung
769 1.250.6.2 dyoung if (ip_mrouter) {
770 1.250.6.2 dyoung /*
771 1.250.6.2 dyoung * If we are acting as a multicast router, all
772 1.250.6.2 dyoung * incoming multicast packets are passed to the
773 1.250.6.2 dyoung * kernel-level multicast forwarding function.
774 1.250.6.2 dyoung * The packet is returned (relatively) intact; if
775 1.250.6.2 dyoung * ip_mforward() returns a non-zero value, the packet
776 1.250.6.2 dyoung * must be discarded, else it may be accepted below.
777 1.250.6.2 dyoung *
778 1.250.6.2 dyoung * (The IP ident field is put in the same byte order
779 1.250.6.2 dyoung * as expected when ip_mforward() is called from
780 1.250.6.2 dyoung * ip_output().)
781 1.250.6.2 dyoung */
782 1.250.6.2 dyoung if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
783 1.250.6.2 dyoung ipstat.ips_cantforward++;
784 1.250.6.2 dyoung m_freem(m);
785 1.250.6.2 dyoung return;
786 1.250.6.2 dyoung }
787 1.250.6.2 dyoung
788 1.250.6.2 dyoung /*
789 1.250.6.2 dyoung * The process-level routing demon needs to receive
790 1.250.6.2 dyoung * all multicast IGMP packets, whether or not this
791 1.250.6.2 dyoung * host belongs to their destination groups.
792 1.250.6.2 dyoung */
793 1.250.6.2 dyoung if (ip->ip_p == IPPROTO_IGMP)
794 1.250.6.2 dyoung goto ours;
795 1.250.6.2 dyoung ipstat.ips_forward++;
796 1.250.6.2 dyoung }
797 1.250.6.2 dyoung #endif
798 1.250.6.2 dyoung /*
799 1.250.6.2 dyoung * See if we belong to the destination multicast group on the
800 1.250.6.2 dyoung * arrival interface.
801 1.250.6.2 dyoung */
802 1.250.6.2 dyoung IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
803 1.250.6.2 dyoung if (inm == NULL) {
804 1.250.6.2 dyoung ipstat.ips_cantforward++;
805 1.250.6.2 dyoung m_freem(m);
806 1.250.6.2 dyoung return;
807 1.250.6.2 dyoung }
808 1.250.6.2 dyoung goto ours;
809 1.250.6.2 dyoung }
810 1.250.6.2 dyoung if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
811 1.250.6.2 dyoung in_nullhost(ip->ip_dst))
812 1.250.6.2 dyoung goto ours;
813 1.250.6.2 dyoung
814 1.250.6.2 dyoung /*
815 1.250.6.2 dyoung * Not for us; forward if possible and desirable.
816 1.250.6.2 dyoung */
817 1.250.6.2 dyoung if (ipforwarding == 0) {
818 1.250.6.2 dyoung ipstat.ips_cantforward++;
819 1.250.6.2 dyoung m_freem(m);
820 1.250.6.2 dyoung } else {
821 1.250.6.2 dyoung /*
822 1.250.6.2 dyoung * If ip_dst matched any of my address on !IFF_UP interface,
823 1.250.6.2 dyoung * and there's no IFF_UP interface that matches ip_dst,
824 1.250.6.2 dyoung * send icmp unreach. Forwarding it will result in in-kernel
825 1.250.6.2 dyoung * forwarding loop till TTL goes to 0.
826 1.250.6.2 dyoung */
827 1.250.6.2 dyoung if (downmatch) {
828 1.250.6.2 dyoung icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
829 1.250.6.2 dyoung ipstat.ips_cantforward++;
830 1.250.6.2 dyoung return;
831 1.250.6.2 dyoung }
832 1.250.6.2 dyoung #ifdef IPSEC
833 1.250.6.2 dyoung if (ipsec4_in_reject(m, NULL)) {
834 1.250.6.2 dyoung ipsecstat.in_polvio++;
835 1.250.6.2 dyoung goto bad;
836 1.250.6.2 dyoung }
837 1.250.6.2 dyoung #endif
838 1.250.6.2 dyoung #ifdef FAST_IPSEC
839 1.250.6.2 dyoung mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
840 1.250.6.2 dyoung s = splsoftnet();
841 1.250.6.2 dyoung if (mtag != NULL) {
842 1.250.6.2 dyoung tdbi = (struct tdb_ident *)(mtag + 1);
843 1.250.6.2 dyoung sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
844 1.250.6.2 dyoung } else {
845 1.250.6.2 dyoung sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
846 1.250.6.2 dyoung IP_FORWARDING, &error);
847 1.250.6.2 dyoung }
848 1.250.6.2 dyoung if (sp == NULL) { /* NB: can happen if error */
849 1.250.6.2 dyoung splx(s);
850 1.250.6.2 dyoung /*XXX error stat???*/
851 1.250.6.2 dyoung DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
852 1.250.6.2 dyoung goto bad;
853 1.250.6.2 dyoung }
854 1.250.6.2 dyoung
855 1.250.6.2 dyoung /*
856 1.250.6.2 dyoung * Check security policy against packet attributes.
857 1.250.6.2 dyoung */
858 1.250.6.2 dyoung error = ipsec_in_reject(sp, m);
859 1.250.6.2 dyoung KEY_FREESP(&sp);
860 1.250.6.2 dyoung splx(s);
861 1.250.6.2 dyoung if (error) {
862 1.250.6.2 dyoung ipstat.ips_cantforward++;
863 1.250.6.2 dyoung goto bad;
864 1.250.6.2 dyoung }
865 1.250.6.2 dyoung
866 1.250.6.2 dyoung /*
867 1.250.6.2 dyoung * Peek at the outbound SP for this packet to determine if
868 1.250.6.2 dyoung * it's a Fast Forward candidate.
869 1.250.6.2 dyoung */
870 1.250.6.2 dyoung mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
871 1.250.6.2 dyoung if (mtag != NULL)
872 1.250.6.2 dyoung m->m_flags &= ~M_CANFASTFWD;
873 1.250.6.2 dyoung else {
874 1.250.6.2 dyoung s = splsoftnet();
875 1.250.6.2 dyoung sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND,
876 1.250.6.2 dyoung (IP_FORWARDING |
877 1.250.6.2 dyoung (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
878 1.250.6.2 dyoung &error, NULL);
879 1.250.6.2 dyoung if (sp != NULL) {
880 1.250.6.2 dyoung m->m_flags &= ~M_CANFASTFWD;
881 1.250.6.2 dyoung KEY_FREESP(&sp);
882 1.250.6.2 dyoung }
883 1.250.6.2 dyoung splx(s);
884 1.250.6.2 dyoung }
885 1.250.6.2 dyoung #endif /* FAST_IPSEC */
886 1.250.6.2 dyoung
887 1.250.6.2 dyoung ip_forward(m, srcrt);
888 1.250.6.2 dyoung }
889 1.250.6.2 dyoung return;
890 1.250.6.2 dyoung
891 1.250.6.2 dyoung ours:
892 1.250.6.2 dyoung /*
893 1.250.6.2 dyoung * If offset or IP_MF are set, must reassemble.
894 1.250.6.2 dyoung * Otherwise, nothing need be done.
895 1.250.6.2 dyoung * (We could look in the reassembly queue to see
896 1.250.6.2 dyoung * if the packet was previously fragmented,
897 1.250.6.2 dyoung * but it's not worth the time; just let them time out.)
898 1.250.6.2 dyoung */
899 1.250.6.2 dyoung if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
900 1.250.6.2 dyoung
901 1.250.6.2 dyoung /*
902 1.250.6.2 dyoung * Look for queue of fragments
903 1.250.6.2 dyoung * of this datagram.
904 1.250.6.2 dyoung */
905 1.250.6.2 dyoung IPQ_LOCK();
906 1.250.6.2 dyoung hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
907 1.250.6.2 dyoung LIST_FOREACH(fp, &ipq[hash], ipq_q) {
908 1.250.6.2 dyoung if (ip->ip_id == fp->ipq_id &&
909 1.250.6.2 dyoung in_hosteq(ip->ip_src, fp->ipq_src) &&
910 1.250.6.2 dyoung in_hosteq(ip->ip_dst, fp->ipq_dst) &&
911 1.250.6.2 dyoung ip->ip_p == fp->ipq_p)
912 1.250.6.2 dyoung goto found;
913 1.250.6.2 dyoung
914 1.250.6.2 dyoung }
915 1.250.6.2 dyoung fp = 0;
916 1.250.6.2 dyoung found:
917 1.250.6.2 dyoung
918 1.250.6.2 dyoung /*
919 1.250.6.2 dyoung * Adjust ip_len to not reflect header,
920 1.250.6.2 dyoung * set ipqe_mff if more fragments are expected,
921 1.250.6.2 dyoung * convert offset of this to bytes.
922 1.250.6.2 dyoung */
923 1.250.6.2 dyoung ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
924 1.250.6.2 dyoung mff = (ip->ip_off & htons(IP_MF)) != 0;
925 1.250.6.2 dyoung if (mff) {
926 1.250.6.2 dyoung /*
927 1.250.6.2 dyoung * Make sure that fragments have a data length
928 1.250.6.2 dyoung * that's a non-zero multiple of 8 bytes.
929 1.250.6.2 dyoung */
930 1.250.6.2 dyoung if (ntohs(ip->ip_len) == 0 ||
931 1.250.6.2 dyoung (ntohs(ip->ip_len) & 0x7) != 0) {
932 1.250.6.2 dyoung ipstat.ips_badfrags++;
933 1.250.6.2 dyoung IPQ_UNLOCK();
934 1.250.6.2 dyoung goto bad;
935 1.250.6.2 dyoung }
936 1.250.6.2 dyoung }
937 1.250.6.2 dyoung ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3);
938 1.250.6.2 dyoung
939 1.250.6.2 dyoung /*
940 1.250.6.2 dyoung * If datagram marked as having more fragments
941 1.250.6.2 dyoung * or if this is not the first fragment,
942 1.250.6.2 dyoung * attempt reassembly; if it succeeds, proceed.
943 1.250.6.2 dyoung */
944 1.250.6.2 dyoung if (mff || ip->ip_off != htons(0)) {
945 1.250.6.2 dyoung ipstat.ips_fragments++;
946 1.250.6.2 dyoung s = splvm();
947 1.250.6.2 dyoung ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
948 1.250.6.2 dyoung splx(s);
949 1.250.6.2 dyoung if (ipqe == NULL) {
950 1.250.6.2 dyoung ipstat.ips_rcvmemdrop++;
951 1.250.6.2 dyoung IPQ_UNLOCK();
952 1.250.6.2 dyoung goto bad;
953 1.250.6.2 dyoung }
954 1.250.6.2 dyoung ipqe->ipqe_mff = mff;
955 1.250.6.2 dyoung ipqe->ipqe_m = m;
956 1.250.6.2 dyoung ipqe->ipqe_ip = ip;
957 1.250.6.2 dyoung m = ip_reass(ipqe, fp, &ipq[hash]);
958 1.250.6.2 dyoung if (m == 0) {
959 1.250.6.2 dyoung IPQ_UNLOCK();
960 1.250.6.2 dyoung return;
961 1.250.6.2 dyoung }
962 1.250.6.2 dyoung ipstat.ips_reassembled++;
963 1.250.6.2 dyoung ip = mtod(m, struct ip *);
964 1.250.6.2 dyoung hlen = ip->ip_hl << 2;
965 1.250.6.2 dyoung ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
966 1.250.6.2 dyoung } else
967 1.250.6.2 dyoung if (fp)
968 1.250.6.2 dyoung ip_freef(fp);
969 1.250.6.2 dyoung IPQ_UNLOCK();
970 1.250.6.2 dyoung }
971 1.250.6.2 dyoung
972 1.250.6.2 dyoung #if defined(IPSEC)
973 1.250.6.2 dyoung /*
974 1.250.6.2 dyoung * enforce IPsec policy checking if we are seeing last header.
975 1.250.6.2 dyoung * note that we do not visit this with protocols with pcb layer
976 1.250.6.2 dyoung * code - like udp/tcp/raw ip.
977 1.250.6.2 dyoung */
978 1.250.6.2 dyoung if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
979 1.250.6.2 dyoung ipsec4_in_reject(m, NULL)) {
980 1.250.6.2 dyoung ipsecstat.in_polvio++;
981 1.250.6.2 dyoung goto bad;
982 1.250.6.2 dyoung }
983 1.250.6.2 dyoung #endif
984 1.250.6.2 dyoung #ifdef FAST_IPSEC
985 1.250.6.2 dyoung /*
986 1.250.6.2 dyoung * enforce IPsec policy checking if we are seeing last header.
987 1.250.6.2 dyoung * note that we do not visit this with protocols with pcb layer
988 1.250.6.2 dyoung * code - like udp/tcp/raw ip.
989 1.250.6.2 dyoung */
990 1.250.6.2 dyoung if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
991 1.250.6.2 dyoung /*
992 1.250.6.2 dyoung * Check if the packet has already had IPsec processing
993 1.250.6.2 dyoung * done. If so, then just pass it along. This tag gets
994 1.250.6.2 dyoung * set during AH, ESP, etc. input handling, before the
995 1.250.6.2 dyoung * packet is returned to the ip input queue for delivery.
996 1.250.6.2 dyoung */
997 1.250.6.2 dyoung mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
998 1.250.6.2 dyoung s = splsoftnet();
999 1.250.6.2 dyoung if (mtag != NULL) {
1000 1.250.6.2 dyoung tdbi = (struct tdb_ident *)(mtag + 1);
1001 1.250.6.2 dyoung sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
1002 1.250.6.2 dyoung } else {
1003 1.250.6.2 dyoung sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
1004 1.250.6.2 dyoung IP_FORWARDING, &error);
1005 1.250.6.2 dyoung }
1006 1.250.6.2 dyoung if (sp != NULL) {
1007 1.250.6.2 dyoung /*
1008 1.250.6.2 dyoung * Check security policy against packet attributes.
1009 1.250.6.2 dyoung */
1010 1.250.6.2 dyoung error = ipsec_in_reject(sp, m);
1011 1.250.6.2 dyoung KEY_FREESP(&sp);
1012 1.250.6.2 dyoung } else {
1013 1.250.6.2 dyoung /* XXX error stat??? */
1014 1.250.6.2 dyoung error = EINVAL;
1015 1.250.6.2 dyoung DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
1016 1.250.6.2 dyoung goto bad;
1017 1.250.6.2 dyoung }
1018 1.250.6.2 dyoung splx(s);
1019 1.250.6.2 dyoung if (error)
1020 1.250.6.2 dyoung goto bad;
1021 1.250.6.2 dyoung }
1022 1.250.6.2 dyoung #endif /* FAST_IPSEC */
1023 1.250.6.2 dyoung
1024 1.250.6.2 dyoung /*
1025 1.250.6.2 dyoung * Switch out to protocol's input routine.
1026 1.250.6.2 dyoung */
1027 1.250.6.2 dyoung #if IFA_STATS
1028 1.250.6.2 dyoung if (ia && ip)
1029 1.250.6.2 dyoung ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len);
1030 1.250.6.2 dyoung #endif
1031 1.250.6.2 dyoung ipstat.ips_delivered++;
1032 1.250.6.2 dyoung {
1033 1.250.6.2 dyoung int off = hlen, nh = ip->ip_p;
1034 1.250.6.2 dyoung
1035 1.250.6.2 dyoung (*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
1036 1.250.6.2 dyoung return;
1037 1.250.6.2 dyoung }
1038 1.250.6.2 dyoung bad:
1039 1.250.6.2 dyoung m_freem(m);
1040 1.250.6.2 dyoung return;
1041 1.250.6.2 dyoung
1042 1.250.6.2 dyoung badcsum:
1043 1.250.6.2 dyoung ipstat.ips_badsum++;
1044 1.250.6.2 dyoung m_freem(m);
1045 1.250.6.2 dyoung }
1046 1.250.6.2 dyoung
1047 1.250.6.2 dyoung /*
1048 1.250.6.2 dyoung * Take incoming datagram fragment and try to
1049 1.250.6.2 dyoung * reassemble it into whole datagram. If a chain for
1050 1.250.6.2 dyoung * reassembly of this datagram already exists, then it
1051 1.250.6.2 dyoung * is given as fp; otherwise have to make a chain.
1052 1.250.6.2 dyoung */
1053 1.250.6.2 dyoung struct mbuf *
1054 1.250.6.2 dyoung ip_reass(struct ipqent *ipqe, struct ipq *fp, struct ipqhead *ipqhead)
1055 1.250.6.2 dyoung {
1056 1.250.6.2 dyoung struct mbuf *m = ipqe->ipqe_m;
1057 1.250.6.2 dyoung struct ipqent *nq, *p, *q;
1058 1.250.6.2 dyoung struct ip *ip;
1059 1.250.6.2 dyoung struct mbuf *t;
1060 1.250.6.2 dyoung int hlen = ipqe->ipqe_ip->ip_hl << 2;
1061 1.250.6.2 dyoung int i, next, s;
1062 1.250.6.2 dyoung
1063 1.250.6.2 dyoung IPQ_LOCK_CHECK();
1064 1.250.6.2 dyoung
1065 1.250.6.2 dyoung /*
1066 1.250.6.2 dyoung * Presence of header sizes in mbufs
1067 1.250.6.2 dyoung * would confuse code below.
1068 1.250.6.2 dyoung */
1069 1.250.6.2 dyoung m->m_data += hlen;
1070 1.250.6.2 dyoung m->m_len -= hlen;
1071 1.250.6.2 dyoung
1072 1.250.6.2 dyoung #ifdef notyet
1073 1.250.6.2 dyoung /* make sure fragment limit is up-to-date */
1074 1.250.6.2 dyoung CHECK_NMBCLUSTER_PARAMS();
1075 1.250.6.2 dyoung
1076 1.250.6.2 dyoung /* If we have too many fragments, drop the older half. */
1077 1.250.6.2 dyoung if (ip_nfrags >= ip_maxfrags)
1078 1.250.6.2 dyoung ip_reass_drophalf(void);
1079 1.250.6.2 dyoung #endif
1080 1.250.6.2 dyoung
1081 1.250.6.2 dyoung /*
1082 1.250.6.2 dyoung * We are about to add a fragment; increment frag count.
1083 1.250.6.2 dyoung */
1084 1.250.6.2 dyoung ip_nfrags++;
1085 1.250.6.2 dyoung
1086 1.250.6.2 dyoung /*
1087 1.250.6.2 dyoung * If first fragment to arrive, create a reassembly queue.
1088 1.250.6.2 dyoung */
1089 1.250.6.2 dyoung if (fp == 0) {
1090 1.250.6.2 dyoung /*
1091 1.250.6.2 dyoung * Enforce upper bound on number of fragmented packets
1092 1.250.6.2 dyoung * for which we attempt reassembly;
1093 1.250.6.2 dyoung * If maxfrag is 0, never accept fragments.
1094 1.250.6.2 dyoung * If maxfrag is -1, accept all fragments without limitation.
1095 1.250.6.2 dyoung */
1096 1.250.6.2 dyoung if (ip_maxfragpackets < 0)
1097 1.250.6.2 dyoung ;
1098 1.250.6.2 dyoung else if (ip_nfragpackets >= ip_maxfragpackets)
1099 1.250.6.2 dyoung goto dropfrag;
1100 1.250.6.2 dyoung ip_nfragpackets++;
1101 1.250.6.2 dyoung MALLOC(fp, struct ipq *, sizeof (struct ipq),
1102 1.250.6.2 dyoung M_FTABLE, M_NOWAIT);
1103 1.250.6.2 dyoung if (fp == NULL)
1104 1.250.6.2 dyoung goto dropfrag;
1105 1.250.6.2 dyoung LIST_INSERT_HEAD(ipqhead, fp, ipq_q);
1106 1.250.6.2 dyoung fp->ipq_nfrags = 1;
1107 1.250.6.2 dyoung fp->ipq_ttl = IPFRAGTTL;
1108 1.250.6.2 dyoung fp->ipq_p = ipqe->ipqe_ip->ip_p;
1109 1.250.6.2 dyoung fp->ipq_id = ipqe->ipqe_ip->ip_id;
1110 1.250.6.2 dyoung TAILQ_INIT(&fp->ipq_fragq);
1111 1.250.6.2 dyoung fp->ipq_src = ipqe->ipqe_ip->ip_src;
1112 1.250.6.2 dyoung fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
1113 1.250.6.2 dyoung p = NULL;
1114 1.250.6.2 dyoung goto insert;
1115 1.250.6.2 dyoung } else {
1116 1.250.6.2 dyoung fp->ipq_nfrags++;
1117 1.250.6.2 dyoung }
1118 1.250.6.2 dyoung
1119 1.250.6.2 dyoung /*
1120 1.250.6.2 dyoung * Find a segment which begins after this one does.
1121 1.250.6.2 dyoung */
1122 1.250.6.2 dyoung for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
1123 1.250.6.2 dyoung p = q, q = TAILQ_NEXT(q, ipqe_q))
1124 1.250.6.2 dyoung if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
1125 1.250.6.2 dyoung break;
1126 1.250.6.2 dyoung
1127 1.250.6.2 dyoung /*
1128 1.250.6.2 dyoung * If there is a preceding segment, it may provide some of
1129 1.250.6.2 dyoung * our data already. If so, drop the data from the incoming
1130 1.250.6.2 dyoung * segment. If it provides all of our data, drop us.
1131 1.250.6.2 dyoung */
1132 1.250.6.2 dyoung if (p != NULL) {
1133 1.250.6.2 dyoung i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
1134 1.250.6.2 dyoung ntohs(ipqe->ipqe_ip->ip_off);
1135 1.250.6.2 dyoung if (i > 0) {
1136 1.250.6.2 dyoung if (i >= ntohs(ipqe->ipqe_ip->ip_len))
1137 1.250.6.2 dyoung goto dropfrag;
1138 1.250.6.2 dyoung m_adj(ipqe->ipqe_m, i);
1139 1.250.6.2 dyoung ipqe->ipqe_ip->ip_off =
1140 1.250.6.2 dyoung htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
1141 1.250.6.2 dyoung ipqe->ipqe_ip->ip_len =
1142 1.250.6.2 dyoung htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
1143 1.250.6.2 dyoung }
1144 1.250.6.2 dyoung }
1145 1.250.6.2 dyoung
1146 1.250.6.2 dyoung /*
1147 1.250.6.2 dyoung * While we overlap succeeding segments trim them or,
1148 1.250.6.2 dyoung * if they are completely covered, dequeue them.
1149 1.250.6.2 dyoung */
1150 1.250.6.2 dyoung for (; q != NULL &&
1151 1.250.6.2 dyoung ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
1152 1.250.6.2 dyoung ntohs(q->ipqe_ip->ip_off); q = nq) {
1153 1.250.6.2 dyoung i = (ntohs(ipqe->ipqe_ip->ip_off) +
1154 1.250.6.2 dyoung ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
1155 1.250.6.2 dyoung if (i < ntohs(q->ipqe_ip->ip_len)) {
1156 1.250.6.2 dyoung q->ipqe_ip->ip_len =
1157 1.250.6.2 dyoung htons(ntohs(q->ipqe_ip->ip_len) - i);
1158 1.250.6.2 dyoung q->ipqe_ip->ip_off =
1159 1.250.6.2 dyoung htons(ntohs(q->ipqe_ip->ip_off) + i);
1160 1.250.6.2 dyoung m_adj(q->ipqe_m, i);
1161 1.250.6.2 dyoung break;
1162 1.250.6.2 dyoung }
1163 1.250.6.2 dyoung nq = TAILQ_NEXT(q, ipqe_q);
1164 1.250.6.2 dyoung m_freem(q->ipqe_m);
1165 1.250.6.2 dyoung TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
1166 1.250.6.2 dyoung s = splvm();
1167 1.250.6.2 dyoung pool_put(&ipqent_pool, q);
1168 1.250.6.2 dyoung splx(s);
1169 1.250.6.2 dyoung fp->ipq_nfrags--;
1170 1.250.6.2 dyoung ip_nfrags--;
1171 1.250.6.2 dyoung }
1172 1.250.6.2 dyoung
1173 1.250.6.2 dyoung insert:
1174 1.250.6.2 dyoung /*
1175 1.250.6.2 dyoung * Stick new segment in its place;
1176 1.250.6.2 dyoung * check for complete reassembly.
1177 1.250.6.2 dyoung */
1178 1.250.6.2 dyoung if (p == NULL) {
1179 1.250.6.2 dyoung TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
1180 1.250.6.2 dyoung } else {
1181 1.250.6.2 dyoung TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
1182 1.250.6.2 dyoung }
1183 1.250.6.2 dyoung next = 0;
1184 1.250.6.2 dyoung for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
1185 1.250.6.2 dyoung p = q, q = TAILQ_NEXT(q, ipqe_q)) {
1186 1.250.6.2 dyoung if (ntohs(q->ipqe_ip->ip_off) != next)
1187 1.250.6.2 dyoung return (0);
1188 1.250.6.2 dyoung next += ntohs(q->ipqe_ip->ip_len);
1189 1.250.6.2 dyoung }
1190 1.250.6.2 dyoung if (p->ipqe_mff)
1191 1.250.6.2 dyoung return (0);
1192 1.250.6.2 dyoung
1193 1.250.6.2 dyoung /*
1194 1.250.6.2 dyoung * Reassembly is complete. Check for a bogus message size and
1195 1.250.6.2 dyoung * concatenate fragments.
1196 1.250.6.2 dyoung */
1197 1.250.6.2 dyoung q = TAILQ_FIRST(&fp->ipq_fragq);
1198 1.250.6.2 dyoung ip = q->ipqe_ip;
1199 1.250.6.2 dyoung if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
1200 1.250.6.2 dyoung ipstat.ips_toolong++;
1201 1.250.6.2 dyoung ip_freef(fp);
1202 1.250.6.2 dyoung return (0);
1203 1.250.6.2 dyoung }
1204 1.250.6.2 dyoung m = q->ipqe_m;
1205 1.250.6.2 dyoung t = m->m_next;
1206 1.250.6.2 dyoung m->m_next = 0;
1207 1.250.6.2 dyoung m_cat(m, t);
1208 1.250.6.2 dyoung nq = TAILQ_NEXT(q, ipqe_q);
1209 1.250.6.2 dyoung s = splvm();
1210 1.250.6.2 dyoung pool_put(&ipqent_pool, q);
1211 1.250.6.2 dyoung splx(s);
1212 1.250.6.2 dyoung for (q = nq; q != NULL; q = nq) {
1213 1.250.6.2 dyoung t = q->ipqe_m;
1214 1.250.6.2 dyoung nq = TAILQ_NEXT(q, ipqe_q);
1215 1.250.6.2 dyoung s = splvm();
1216 1.250.6.2 dyoung pool_put(&ipqent_pool, q);
1217 1.250.6.2 dyoung splx(s);
1218 1.250.6.2 dyoung m_cat(m, t);
1219 1.250.6.2 dyoung }
1220 1.250.6.2 dyoung ip_nfrags -= fp->ipq_nfrags;
1221 1.250.6.2 dyoung
1222 1.250.6.2 dyoung /*
1223 1.250.6.2 dyoung * Create header for new ip packet by
1224 1.250.6.2 dyoung * modifying header of first packet;
1225 1.250.6.2 dyoung * dequeue and discard fragment reassembly header.
1226 1.250.6.2 dyoung * Make header visible.
1227 1.250.6.2 dyoung */
1228 1.250.6.2 dyoung ip->ip_len = htons(next);
1229 1.250.6.2 dyoung ip->ip_src = fp->ipq_src;
1230 1.250.6.2 dyoung ip->ip_dst = fp->ipq_dst;
1231 1.250.6.2 dyoung LIST_REMOVE(fp, ipq_q);
1232 1.250.6.2 dyoung FREE(fp, M_FTABLE);
1233 1.250.6.2 dyoung ip_nfragpackets--;
1234 1.250.6.2 dyoung m->m_len += (ip->ip_hl << 2);
1235 1.250.6.2 dyoung m->m_data -= (ip->ip_hl << 2);
1236 1.250.6.2 dyoung /* some debugging cruft by sklower, below, will go away soon */
1237 1.250.6.2 dyoung if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
1238 1.250.6.2 dyoung int plen = 0;
1239 1.250.6.2 dyoung for (t = m; t; t = t->m_next)
1240 1.250.6.2 dyoung plen += t->m_len;
1241 1.250.6.2 dyoung m->m_pkthdr.len = plen;
1242 1.250.6.2 dyoung m->m_pkthdr.csum_flags = 0;
1243 1.250.6.2 dyoung }
1244 1.250.6.2 dyoung return (m);
1245 1.250.6.2 dyoung
1246 1.250.6.2 dyoung dropfrag:
1247 1.250.6.2 dyoung if (fp != 0)
1248 1.250.6.2 dyoung fp->ipq_nfrags--;
1249 1.250.6.2 dyoung ip_nfrags--;
1250 1.250.6.2 dyoung ipstat.ips_fragdropped++;
1251 1.250.6.2 dyoung m_freem(m);
1252 1.250.6.2 dyoung s = splvm();
1253 1.250.6.2 dyoung pool_put(&ipqent_pool, ipqe);
1254 1.250.6.2 dyoung splx(s);
1255 1.250.6.2 dyoung return (0);
1256 1.250.6.2 dyoung }
1257 1.250.6.2 dyoung
1258 1.250.6.2 dyoung /*
1259 1.250.6.2 dyoung * Free a fragment reassembly header and all
1260 1.250.6.2 dyoung * associated datagrams.
1261 1.250.6.2 dyoung */
1262 1.250.6.2 dyoung void
1263 1.250.6.2 dyoung ip_freef(struct ipq *fp)
1264 1.250.6.2 dyoung {
1265 1.250.6.2 dyoung struct ipqent *q, *p;
1266 1.250.6.2 dyoung u_int nfrags = 0;
1267 1.250.6.2 dyoung int s;
1268 1.250.6.2 dyoung
1269 1.250.6.2 dyoung IPQ_LOCK_CHECK();
1270 1.250.6.2 dyoung
1271 1.250.6.2 dyoung for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
1272 1.250.6.2 dyoung p = TAILQ_NEXT(q, ipqe_q);
1273 1.250.6.2 dyoung m_freem(q->ipqe_m);
1274 1.250.6.2 dyoung nfrags++;
1275 1.250.6.2 dyoung TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
1276 1.250.6.2 dyoung s = splvm();
1277 1.250.6.2 dyoung pool_put(&ipqent_pool, q);
1278 1.250.6.2 dyoung splx(s);
1279 1.250.6.2 dyoung }
1280 1.250.6.2 dyoung
1281 1.250.6.2 dyoung if (nfrags != fp->ipq_nfrags)
1282 1.250.6.2 dyoung printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags);
1283 1.250.6.2 dyoung ip_nfrags -= nfrags;
1284 1.250.6.2 dyoung LIST_REMOVE(fp, ipq_q);
1285 1.250.6.2 dyoung FREE(fp, M_FTABLE);
1286 1.250.6.2 dyoung ip_nfragpackets--;
1287 1.250.6.2 dyoung }
1288 1.250.6.2 dyoung
1289 1.250.6.2 dyoung /*
1290 1.250.6.2 dyoung * IP reassembly TTL machinery for multiplicative drop.
1291 1.250.6.2 dyoung */
1292 1.250.6.2 dyoung static u_int fragttl_histo[(IPFRAGTTL+1)];
1293 1.250.6.2 dyoung
1294 1.250.6.2 dyoung
1295 1.250.6.2 dyoung /*
1296 1.250.6.2 dyoung * Decrement TTL of all reasembly queue entries by `ticks'.
1297 1.250.6.2 dyoung * Count number of distinct fragments (as opposed to partial, fragmented
1298 1.250.6.2 dyoung * datagrams) in the reassembly queue. While we traverse the entire
1299 1.250.6.2 dyoung * reassembly queue, compute and return the median TTL over all fragments.
1300 1.250.6.2 dyoung */
1301 1.250.6.2 dyoung static u_int
1302 1.250.6.2 dyoung ip_reass_ttl_decr(u_int ticks)
1303 1.250.6.2 dyoung {
1304 1.250.6.2 dyoung u_int nfrags, median, dropfraction, keepfraction;
1305 1.250.6.2 dyoung struct ipq *fp, *nfp;
1306 1.250.6.2 dyoung int i;
1307 1.250.6.2 dyoung
1308 1.250.6.2 dyoung nfrags = 0;
1309 1.250.6.2 dyoung memset(fragttl_histo, 0, sizeof fragttl_histo);
1310 1.250.6.2 dyoung
1311 1.250.6.2 dyoung for (i = 0; i < IPREASS_NHASH; i++) {
1312 1.250.6.2 dyoung for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) {
1313 1.250.6.2 dyoung fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ?
1314 1.250.6.2 dyoung 0 : fp->ipq_ttl - ticks);
1315 1.250.6.2 dyoung nfp = LIST_NEXT(fp, ipq_q);
1316 1.250.6.2 dyoung if (fp->ipq_ttl == 0) {
1317 1.250.6.2 dyoung ipstat.ips_fragtimeout++;
1318 1.250.6.2 dyoung ip_freef(fp);
1319 1.250.6.2 dyoung } else {
1320 1.250.6.2 dyoung nfrags += fp->ipq_nfrags;
1321 1.250.6.2 dyoung fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags;
1322 1.250.6.2 dyoung }
1323 1.250.6.2 dyoung }
1324 1.250.6.2 dyoung }
1325 1.250.6.2 dyoung
1326 1.250.6.2 dyoung KASSERT(ip_nfrags == nfrags);
1327 1.250.6.2 dyoung
1328 1.250.6.2 dyoung /* Find median (or other drop fraction) in histogram. */
1329 1.250.6.2 dyoung dropfraction = (ip_nfrags / 2);
1330 1.250.6.2 dyoung keepfraction = ip_nfrags - dropfraction;
1331 1.250.6.2 dyoung for (i = IPFRAGTTL, median = 0; i >= 0; i--) {
1332 1.250.6.2 dyoung median += fragttl_histo[i];
1333 1.250.6.2 dyoung if (median >= keepfraction)
1334 1.250.6.2 dyoung break;
1335 1.250.6.2 dyoung }
1336 1.250.6.2 dyoung
1337 1.250.6.2 dyoung /* Return TTL of median (or other fraction). */
1338 1.250.6.2 dyoung return (u_int)i;
1339 1.250.6.2 dyoung }
1340 1.250.6.2 dyoung
1341 1.250.6.2 dyoung void
1342 1.250.6.2 dyoung ip_reass_drophalf(void)
1343 1.250.6.2 dyoung {
1344 1.250.6.2 dyoung
1345 1.250.6.2 dyoung u_int median_ticks;
1346 1.250.6.2 dyoung /*
1347 1.250.6.2 dyoung * Compute median TTL of all fragments, and count frags
1348 1.250.6.2 dyoung * with that TTL or lower (roughly half of all fragments).
1349 1.250.6.2 dyoung */
1350 1.250.6.2 dyoung median_ticks = ip_reass_ttl_decr(0);
1351 1.250.6.2 dyoung
1352 1.250.6.2 dyoung /* Drop half. */
1353 1.250.6.2 dyoung median_ticks = ip_reass_ttl_decr(median_ticks);
1354 1.250.6.2 dyoung
1355 1.250.6.2 dyoung }
1356 1.250.6.2 dyoung
1357 1.250.6.2 dyoung /*
1358 1.250.6.2 dyoung * IP timer processing;
1359 1.250.6.2 dyoung * if a timer expires on a reassembly
1360 1.250.6.2 dyoung * queue, discard it.
1361 1.250.6.2 dyoung */
1362 1.250.6.2 dyoung void
1363 1.250.6.2 dyoung ip_slowtimo(void)
1364 1.250.6.2 dyoung {
1365 1.250.6.2 dyoung static u_int dropscanidx = 0;
1366 1.250.6.2 dyoung u_int i;
1367 1.250.6.2 dyoung u_int median_ttl;
1368 1.250.6.2 dyoung int s = splsoftnet();
1369 1.250.6.2 dyoung
1370 1.250.6.2 dyoung IPQ_LOCK();
1371 1.250.6.2 dyoung
1372 1.250.6.2 dyoung /* Age TTL of all fragments by 1 tick .*/
1373 1.250.6.2 dyoung median_ttl = ip_reass_ttl_decr(1);
1374 1.250.6.2 dyoung
1375 1.250.6.2 dyoung /* make sure fragment limit is up-to-date */
1376 1.250.6.2 dyoung CHECK_NMBCLUSTER_PARAMS();
1377 1.250.6.2 dyoung
1378 1.250.6.2 dyoung /* If we have too many fragments, drop the older half. */
1379 1.250.6.2 dyoung if (ip_nfrags > ip_maxfrags)
1380 1.250.6.2 dyoung ip_reass_ttl_decr(median_ttl);
1381 1.250.6.2 dyoung
1382 1.250.6.2 dyoung /*
1383 1.250.6.2 dyoung * If we are over the maximum number of fragmented packets
1384 1.250.6.2 dyoung * (due to the limit being lowered), drain off
1385 1.250.6.2 dyoung * enough to get down to the new limit. Start draining
1386 1.250.6.2 dyoung * from the reassembly hashqueue most recently drained.
1387 1.250.6.2 dyoung */
1388 1.250.6.2 dyoung if (ip_maxfragpackets < 0)
1389 1.250.6.2 dyoung ;
1390 1.250.6.2 dyoung else {
1391 1.250.6.2 dyoung int wrapped = 0;
1392 1.250.6.2 dyoung
1393 1.250.6.2 dyoung i = dropscanidx;
1394 1.250.6.2 dyoung while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) {
1395 1.250.6.2 dyoung while (LIST_FIRST(&ipq[i]) != NULL)
1396 1.250.6.2 dyoung ip_freef(LIST_FIRST(&ipq[i]));
1397 1.250.6.2 dyoung if (++i >= IPREASS_NHASH) {
1398 1.250.6.2 dyoung i = 0;
1399 1.250.6.2 dyoung }
1400 1.250.6.2 dyoung /*
1401 1.250.6.2 dyoung * Dont scan forever even if fragment counters are
1402 1.250.6.2 dyoung * wrong: stop after scanning entire reassembly queue.
1403 1.250.6.2 dyoung */
1404 1.250.6.2 dyoung if (i == dropscanidx)
1405 1.250.6.2 dyoung wrapped = 1;
1406 1.250.6.2 dyoung }
1407 1.250.6.2 dyoung dropscanidx = i;
1408 1.250.6.2 dyoung }
1409 1.250.6.2 dyoung IPQ_UNLOCK();
1410 1.250.6.2 dyoung splx(s);
1411 1.250.6.2 dyoung }
1412 1.250.6.2 dyoung
1413 1.250.6.2 dyoung /*
1414 1.250.6.2 dyoung * Drain off all datagram fragments.
1415 1.250.6.2 dyoung */
1416 1.250.6.2 dyoung void
1417 1.250.6.2 dyoung ip_drain(void)
1418 1.250.6.2 dyoung {
1419 1.250.6.2 dyoung
1420 1.250.6.2 dyoung /*
1421 1.250.6.2 dyoung * We may be called from a device's interrupt context. If
1422 1.250.6.2 dyoung * the ipq is already busy, just bail out now.
1423 1.250.6.2 dyoung */
1424 1.250.6.2 dyoung if (ipq_lock_try() == 0)
1425 1.250.6.2 dyoung return;
1426 1.250.6.2 dyoung
1427 1.250.6.2 dyoung /*
1428 1.250.6.2 dyoung * Drop half the total fragments now. If more mbufs are needed,
1429 1.250.6.2 dyoung * we will be called again soon.
1430 1.250.6.2 dyoung */
1431 1.250.6.2 dyoung ip_reass_drophalf();
1432 1.250.6.2 dyoung
1433 1.250.6.2 dyoung IPQ_UNLOCK();
1434 1.250.6.2 dyoung }
1435 1.250.6.2 dyoung
1436 1.250.6.2 dyoung /*
1437 1.250.6.2 dyoung * Do option processing on a datagram,
1438 1.250.6.2 dyoung * possibly discarding it if bad options are encountered,
1439 1.250.6.2 dyoung * or forwarding it if source-routed.
1440 1.250.6.2 dyoung * Returns 1 if packet has been forwarded/freed,
1441 1.250.6.2 dyoung * 0 if the packet should be processed further.
1442 1.250.6.2 dyoung */
1443 1.250.6.2 dyoung int
1444 1.250.6.2 dyoung ip_dooptions(struct mbuf *m)
1445 1.250.6.2 dyoung {
1446 1.250.6.2 dyoung struct ip *ip = mtod(m, struct ip *);
1447 1.250.6.2 dyoung u_char *cp, *cp0;
1448 1.250.6.2 dyoung struct ip_timestamp *ipt;
1449 1.250.6.2 dyoung struct in_ifaddr *ia;
1450 1.250.6.2 dyoung int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1451 1.250.6.2 dyoung struct in_addr dst;
1452 1.250.6.2 dyoung n_time ntime;
1453 1.250.6.2 dyoung
1454 1.250.6.2 dyoung dst = ip->ip_dst;
1455 1.250.6.2 dyoung cp = (u_char *)(ip + 1);
1456 1.250.6.2 dyoung cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1457 1.250.6.2 dyoung for (; cnt > 0; cnt -= optlen, cp += optlen) {
1458 1.250.6.2 dyoung opt = cp[IPOPT_OPTVAL];
1459 1.250.6.2 dyoung if (opt == IPOPT_EOL)
1460 1.250.6.2 dyoung break;
1461 1.250.6.2 dyoung if (opt == IPOPT_NOP)
1462 1.250.6.2 dyoung optlen = 1;
1463 1.250.6.2 dyoung else {
1464 1.250.6.2 dyoung if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1465 1.250.6.2 dyoung code = &cp[IPOPT_OLEN] - (u_char *)ip;
1466 1.250.6.2 dyoung goto bad;
1467 1.250.6.2 dyoung }
1468 1.250.6.2 dyoung optlen = cp[IPOPT_OLEN];
1469 1.250.6.2 dyoung if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1470 1.250.6.2 dyoung code = &cp[IPOPT_OLEN] - (u_char *)ip;
1471 1.250.6.2 dyoung goto bad;
1472 1.250.6.2 dyoung }
1473 1.250.6.2 dyoung }
1474 1.250.6.2 dyoung switch (opt) {
1475 1.250.6.2 dyoung
1476 1.250.6.2 dyoung default:
1477 1.250.6.2 dyoung break;
1478 1.250.6.2 dyoung
1479 1.250.6.2 dyoung /*
1480 1.250.6.2 dyoung * Source routing with record.
1481 1.250.6.2 dyoung * Find interface with current destination address.
1482 1.250.6.2 dyoung * If none on this machine then drop if strictly routed,
1483 1.250.6.2 dyoung * or do nothing if loosely routed.
1484 1.250.6.2 dyoung * Record interface address and bring up next address
1485 1.250.6.2 dyoung * component. If strictly routed make sure next
1486 1.250.6.2 dyoung * address is on directly accessible net.
1487 1.250.6.2 dyoung */
1488 1.250.6.2 dyoung case IPOPT_LSRR:
1489 1.250.6.2 dyoung case IPOPT_SSRR:
1490 1.250.6.2 dyoung if (ip_allowsrcrt == 0) {
1491 1.250.6.2 dyoung type = ICMP_UNREACH;
1492 1.250.6.2 dyoung code = ICMP_UNREACH_NET_PROHIB;
1493 1.250.6.2 dyoung goto bad;
1494 1.250.6.2 dyoung }
1495 1.250.6.2 dyoung if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1496 1.250.6.2 dyoung code = &cp[IPOPT_OLEN] - (u_char *)ip;
1497 1.250.6.2 dyoung goto bad;
1498 1.250.6.2 dyoung }
1499 1.250.6.2 dyoung if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1500 1.250.6.2 dyoung code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1501 1.250.6.2 dyoung goto bad;
1502 1.250.6.2 dyoung }
1503 1.250.6.2 dyoung ipaddr.sin_addr = ip->ip_dst;
1504 1.250.6.2 dyoung ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
1505 1.250.6.2 dyoung if (ia == 0) {
1506 1.250.6.2 dyoung if (opt == IPOPT_SSRR) {
1507 1.250.6.2 dyoung type = ICMP_UNREACH;
1508 1.250.6.2 dyoung code = ICMP_UNREACH_SRCFAIL;
1509 1.250.6.2 dyoung goto bad;
1510 1.250.6.2 dyoung }
1511 1.250.6.2 dyoung /*
1512 1.250.6.2 dyoung * Loose routing, and not at next destination
1513 1.250.6.2 dyoung * yet; nothing to do except forward.
1514 1.250.6.2 dyoung */
1515 1.250.6.2 dyoung break;
1516 1.250.6.2 dyoung }
1517 1.250.6.2 dyoung off--; /* 0 origin */
1518 1.250.6.2 dyoung if ((off + sizeof(struct in_addr)) > optlen) {
1519 1.250.6.2 dyoung /*
1520 1.250.6.2 dyoung * End of source route. Should be for us.
1521 1.250.6.2 dyoung */
1522 1.250.6.2 dyoung save_rte(cp, ip->ip_src);
1523 1.250.6.2 dyoung break;
1524 1.250.6.2 dyoung }
1525 1.250.6.2 dyoung /*
1526 1.250.6.2 dyoung * locate outgoing interface
1527 1.250.6.2 dyoung */
1528 1.250.6.2 dyoung bcopy((void *)(cp + off), (void *)&ipaddr.sin_addr,
1529 1.250.6.2 dyoung sizeof(ipaddr.sin_addr));
1530 1.250.6.2 dyoung if (opt == IPOPT_SSRR)
1531 1.250.6.2 dyoung ia = ifatoia(ifa_ifwithladdr(sintosa(&ipaddr)));
1532 1.250.6.2 dyoung else
1533 1.250.6.2 dyoung ia = ip_rtaddr(ipaddr.sin_addr);
1534 1.250.6.2 dyoung if (ia == 0) {
1535 1.250.6.2 dyoung type = ICMP_UNREACH;
1536 1.250.6.2 dyoung code = ICMP_UNREACH_SRCFAIL;
1537 1.250.6.2 dyoung goto bad;
1538 1.250.6.2 dyoung }
1539 1.250.6.2 dyoung ip->ip_dst = ipaddr.sin_addr;
1540 1.250.6.2 dyoung bcopy((void *)&ia->ia_addr.sin_addr,
1541 1.250.6.2 dyoung (void *)(cp + off), sizeof(struct in_addr));
1542 1.250.6.2 dyoung cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1543 1.250.6.2 dyoung /*
1544 1.250.6.2 dyoung * Let ip_intr's mcast routing check handle mcast pkts
1545 1.250.6.2 dyoung */
1546 1.250.6.2 dyoung forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1547 1.250.6.2 dyoung break;
1548 1.250.6.2 dyoung
1549 1.250.6.2 dyoung case IPOPT_RR:
1550 1.250.6.2 dyoung if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1551 1.250.6.2 dyoung code = &cp[IPOPT_OLEN] - (u_char *)ip;
1552 1.250.6.2 dyoung goto bad;
1553 1.250.6.2 dyoung }
1554 1.250.6.2 dyoung if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1555 1.250.6.2 dyoung code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1556 1.250.6.2 dyoung goto bad;
1557 1.250.6.2 dyoung }
1558 1.250.6.2 dyoung /*
1559 1.250.6.2 dyoung * If no space remains, ignore.
1560 1.250.6.2 dyoung */
1561 1.250.6.2 dyoung off--; /* 0 origin */
1562 1.250.6.2 dyoung if ((off + sizeof(struct in_addr)) > optlen)
1563 1.250.6.2 dyoung break;
1564 1.250.6.2 dyoung bcopy((void *)(&ip->ip_dst), (void *)&ipaddr.sin_addr,
1565 1.250.6.2 dyoung sizeof(ipaddr.sin_addr));
1566 1.250.6.2 dyoung /*
1567 1.250.6.2 dyoung * locate outgoing interface; if we're the destination,
1568 1.250.6.2 dyoung * use the incoming interface (should be same).
1569 1.250.6.2 dyoung */
1570 1.250.6.2 dyoung if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
1571 1.250.6.2 dyoung == NULL &&
1572 1.250.6.2 dyoung (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
1573 1.250.6.2 dyoung type = ICMP_UNREACH;
1574 1.250.6.2 dyoung code = ICMP_UNREACH_HOST;
1575 1.250.6.2 dyoung goto bad;
1576 1.250.6.2 dyoung }
1577 1.250.6.2 dyoung bcopy((void *)&ia->ia_addr.sin_addr,
1578 1.250.6.2 dyoung (void *)(cp + off), sizeof(struct in_addr));
1579 1.250.6.2 dyoung cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1580 1.250.6.2 dyoung break;
1581 1.250.6.2 dyoung
1582 1.250.6.2 dyoung case IPOPT_TS:
1583 1.250.6.2 dyoung code = cp - (u_char *)ip;
1584 1.250.6.2 dyoung ipt = (struct ip_timestamp *)cp;
1585 1.250.6.2 dyoung if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
1586 1.250.6.2 dyoung code = (u_char *)&ipt->ipt_len - (u_char *)ip;
1587 1.250.6.2 dyoung goto bad;
1588 1.250.6.2 dyoung }
1589 1.250.6.2 dyoung if (ipt->ipt_ptr < 5) {
1590 1.250.6.2 dyoung code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
1591 1.250.6.2 dyoung goto bad;
1592 1.250.6.2 dyoung }
1593 1.250.6.2 dyoung if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
1594 1.250.6.2 dyoung if (++ipt->ipt_oflw == 0) {
1595 1.250.6.2 dyoung code = (u_char *)&ipt->ipt_ptr -
1596 1.250.6.2 dyoung (u_char *)ip;
1597 1.250.6.2 dyoung goto bad;
1598 1.250.6.2 dyoung }
1599 1.250.6.2 dyoung break;
1600 1.250.6.2 dyoung }
1601 1.250.6.2 dyoung cp0 = (cp + ipt->ipt_ptr - 1);
1602 1.250.6.2 dyoung switch (ipt->ipt_flg) {
1603 1.250.6.2 dyoung
1604 1.250.6.2 dyoung case IPOPT_TS_TSONLY:
1605 1.250.6.2 dyoung break;
1606 1.250.6.2 dyoung
1607 1.250.6.2 dyoung case IPOPT_TS_TSANDADDR:
1608 1.250.6.2 dyoung if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1609 1.250.6.2 dyoung sizeof(struct in_addr) > ipt->ipt_len) {
1610 1.250.6.2 dyoung code = (u_char *)&ipt->ipt_ptr -
1611 1.250.6.2 dyoung (u_char *)ip;
1612 1.250.6.2 dyoung goto bad;
1613 1.250.6.2 dyoung }
1614 1.250.6.2 dyoung ipaddr.sin_addr = dst;
1615 1.250.6.2 dyoung ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
1616 1.250.6.2 dyoung m->m_pkthdr.rcvif));
1617 1.250.6.2 dyoung if (ia == 0)
1618 1.250.6.2 dyoung continue;
1619 1.250.6.2 dyoung bcopy(&ia->ia_addr.sin_addr,
1620 1.250.6.2 dyoung cp0, sizeof(struct in_addr));
1621 1.250.6.2 dyoung ipt->ipt_ptr += sizeof(struct in_addr);
1622 1.250.6.2 dyoung break;
1623 1.250.6.2 dyoung
1624 1.250.6.2 dyoung case IPOPT_TS_PRESPEC:
1625 1.250.6.2 dyoung if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1626 1.250.6.2 dyoung sizeof(struct in_addr) > ipt->ipt_len) {
1627 1.250.6.2 dyoung code = (u_char *)&ipt->ipt_ptr -
1628 1.250.6.2 dyoung (u_char *)ip;
1629 1.250.6.2 dyoung goto bad;
1630 1.250.6.2 dyoung }
1631 1.250.6.2 dyoung bcopy(cp0, &ipaddr.sin_addr,
1632 1.250.6.2 dyoung sizeof(struct in_addr));
1633 1.250.6.2 dyoung if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
1634 1.250.6.2 dyoung == NULL)
1635 1.250.6.2 dyoung continue;
1636 1.250.6.2 dyoung ipt->ipt_ptr += sizeof(struct in_addr);
1637 1.250.6.2 dyoung break;
1638 1.250.6.2 dyoung
1639 1.250.6.2 dyoung default:
1640 1.250.6.2 dyoung /* XXX can't take &ipt->ipt_flg */
1641 1.250.6.2 dyoung code = (u_char *)&ipt->ipt_ptr -
1642 1.250.6.2 dyoung (u_char *)ip + 1;
1643 1.250.6.2 dyoung goto bad;
1644 1.250.6.2 dyoung }
1645 1.250.6.2 dyoung ntime = iptime();
1646 1.250.6.2 dyoung cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
1647 1.250.6.2 dyoung memmove((char *)cp + ipt->ipt_ptr - 1, cp0,
1648 1.250.6.2 dyoung sizeof(n_time));
1649 1.250.6.2 dyoung ipt->ipt_ptr += sizeof(n_time);
1650 1.250.6.2 dyoung }
1651 1.250.6.2 dyoung }
1652 1.250.6.2 dyoung if (forward) {
1653 1.250.6.2 dyoung if (ip_forwsrcrt == 0) {
1654 1.250.6.2 dyoung type = ICMP_UNREACH;
1655 1.250.6.2 dyoung code = ICMP_UNREACH_SRCFAIL;
1656 1.250.6.2 dyoung goto bad;
1657 1.250.6.2 dyoung }
1658 1.250.6.2 dyoung ip_forward(m, 1);
1659 1.250.6.2 dyoung return (1);
1660 1.250.6.2 dyoung }
1661 1.250.6.2 dyoung return (0);
1662 1.250.6.2 dyoung bad:
1663 1.250.6.2 dyoung icmp_error(m, type, code, 0, 0);
1664 1.250.6.2 dyoung ipstat.ips_badoptions++;
1665 1.250.6.2 dyoung return (1);
1666 1.250.6.2 dyoung }
1667 1.250.6.2 dyoung
1668 1.250.6.2 dyoung /*
1669 1.250.6.2 dyoung * Given address of next destination (final or next hop),
1670 1.250.6.2 dyoung * return internet address info of interface to be used to get there.
1671 1.250.6.2 dyoung */
1672 1.250.6.2 dyoung struct in_ifaddr *
1673 1.250.6.2 dyoung ip_rtaddr(struct in_addr dst)
1674 1.250.6.2 dyoung {
1675 1.250.6.2 dyoung struct rtentry *rt;
1676 1.250.6.2 dyoung union {
1677 1.250.6.2 dyoung struct sockaddr dst;
1678 1.250.6.2 dyoung struct sockaddr_in dst4;
1679 1.250.6.2 dyoung } u;
1680 1.250.6.2 dyoung
1681 1.250.6.2 dyoung sockaddr_in_init(&u.dst4, &dst, 0);
1682 1.250.6.2 dyoung
1683 1.250.6.2 dyoung if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL)
1684 1.250.6.2 dyoung return NULL;
1685 1.250.6.2 dyoung
1686 1.250.6.2 dyoung return ifatoia(rt->rt_ifa);
1687 1.250.6.2 dyoung }
1688 1.250.6.2 dyoung
1689 1.250.6.2 dyoung /*
1690 1.250.6.2 dyoung * Save incoming source route for use in replies,
1691 1.250.6.2 dyoung * to be picked up later by ip_srcroute if the receiver is interested.
1692 1.250.6.2 dyoung */
1693 1.250.6.2 dyoung void
1694 1.250.6.2 dyoung save_rte(u_char *option, struct in_addr dst)
1695 1.250.6.2 dyoung {
1696 1.250.6.2 dyoung unsigned olen;
1697 1.250.6.2 dyoung
1698 1.250.6.2 dyoung olen = option[IPOPT_OLEN];
1699 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1700 1.250.6.2 dyoung if (ipprintfs)
1701 1.250.6.2 dyoung printf("save_rte: olen %d\n", olen);
1702 1.250.6.2 dyoung #endif /* 0 */
1703 1.250.6.2 dyoung if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1704 1.250.6.2 dyoung return;
1705 1.250.6.2 dyoung bcopy((void *)option, (void *)ip_srcrt.srcopt, olen);
1706 1.250.6.2 dyoung ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1707 1.250.6.2 dyoung ip_srcrt.dst = dst;
1708 1.250.6.2 dyoung }
1709 1.250.6.2 dyoung
1710 1.250.6.2 dyoung /*
1711 1.250.6.2 dyoung * Retrieve incoming source route for use in replies,
1712 1.250.6.2 dyoung * in the same form used by setsockopt.
1713 1.250.6.2 dyoung * The first hop is placed before the options, will be removed later.
1714 1.250.6.2 dyoung */
1715 1.250.6.2 dyoung struct mbuf *
1716 1.250.6.2 dyoung ip_srcroute(void)
1717 1.250.6.2 dyoung {
1718 1.250.6.2 dyoung struct in_addr *p, *q;
1719 1.250.6.2 dyoung struct mbuf *m;
1720 1.250.6.2 dyoung
1721 1.250.6.2 dyoung if (ip_nhops == 0)
1722 1.250.6.2 dyoung return NULL;
1723 1.250.6.2 dyoung m = m_get(M_DONTWAIT, MT_SOOPTS);
1724 1.250.6.2 dyoung if (m == 0)
1725 1.250.6.2 dyoung return NULL;
1726 1.250.6.2 dyoung
1727 1.250.6.2 dyoung MCLAIM(m, &inetdomain.dom_mowner);
1728 1.250.6.2 dyoung #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1729 1.250.6.2 dyoung
1730 1.250.6.2 dyoung /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1731 1.250.6.2 dyoung m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1732 1.250.6.2 dyoung OPTSIZ;
1733 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1734 1.250.6.2 dyoung if (ipprintfs)
1735 1.250.6.2 dyoung printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1736 1.250.6.2 dyoung #endif
1737 1.250.6.2 dyoung
1738 1.250.6.2 dyoung /*
1739 1.250.6.2 dyoung * First save first hop for return route
1740 1.250.6.2 dyoung */
1741 1.250.6.2 dyoung p = &ip_srcrt.route[ip_nhops - 1];
1742 1.250.6.2 dyoung *(mtod(m, struct in_addr *)) = *p--;
1743 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1744 1.250.6.2 dyoung if (ipprintfs)
1745 1.250.6.2 dyoung printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
1746 1.250.6.2 dyoung #endif
1747 1.250.6.2 dyoung
1748 1.250.6.2 dyoung /*
1749 1.250.6.2 dyoung * Copy option fields and padding (nop) to mbuf.
1750 1.250.6.2 dyoung */
1751 1.250.6.2 dyoung ip_srcrt.nop = IPOPT_NOP;
1752 1.250.6.2 dyoung ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1753 1.250.6.2 dyoung memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop,
1754 1.250.6.2 dyoung OPTSIZ);
1755 1.250.6.2 dyoung q = (struct in_addr *)(mtod(m, char *) +
1756 1.250.6.2 dyoung sizeof(struct in_addr) + OPTSIZ);
1757 1.250.6.2 dyoung #undef OPTSIZ
1758 1.250.6.2 dyoung /*
1759 1.250.6.2 dyoung * Record return path as an IP source route,
1760 1.250.6.2 dyoung * reversing the path (pointers are now aligned).
1761 1.250.6.2 dyoung */
1762 1.250.6.2 dyoung while (p >= ip_srcrt.route) {
1763 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1764 1.250.6.2 dyoung if (ipprintfs)
1765 1.250.6.2 dyoung printf(" %x", ntohl(q->s_addr));
1766 1.250.6.2 dyoung #endif
1767 1.250.6.2 dyoung *q++ = *p--;
1768 1.250.6.2 dyoung }
1769 1.250.6.2 dyoung /*
1770 1.250.6.2 dyoung * Last hop goes to final destination.
1771 1.250.6.2 dyoung */
1772 1.250.6.2 dyoung *q = ip_srcrt.dst;
1773 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1774 1.250.6.2 dyoung if (ipprintfs)
1775 1.250.6.2 dyoung printf(" %x\n", ntohl(q->s_addr));
1776 1.250.6.2 dyoung #endif
1777 1.250.6.2 dyoung return (m);
1778 1.250.6.2 dyoung }
1779 1.250.6.2 dyoung
1780 1.250.6.2 dyoung /*
1781 1.250.6.2 dyoung * Strip out IP options, at higher
1782 1.250.6.2 dyoung * level protocol in the kernel.
1783 1.250.6.2 dyoung * Second argument is buffer to which options
1784 1.250.6.2 dyoung * will be moved, and return value is their length.
1785 1.250.6.2 dyoung * XXX should be deleted; last arg currently ignored.
1786 1.250.6.2 dyoung */
1787 1.250.6.2 dyoung void
1788 1.250.6.2 dyoung ip_stripoptions(struct mbuf *m, struct mbuf *mopt)
1789 1.250.6.2 dyoung {
1790 1.250.6.2 dyoung int i;
1791 1.250.6.2 dyoung struct ip *ip = mtod(m, struct ip *);
1792 1.250.6.2 dyoung void *opts;
1793 1.250.6.2 dyoung int olen;
1794 1.250.6.2 dyoung
1795 1.250.6.2 dyoung olen = (ip->ip_hl << 2) - sizeof (struct ip);
1796 1.250.6.2 dyoung opts = (void *)(ip + 1);
1797 1.250.6.2 dyoung i = m->m_len - (sizeof (struct ip) + olen);
1798 1.250.6.2 dyoung memmove(opts, (char *)opts + olen, (unsigned)i);
1799 1.250.6.2 dyoung m->m_len -= olen;
1800 1.250.6.2 dyoung if (m->m_flags & M_PKTHDR)
1801 1.250.6.2 dyoung m->m_pkthdr.len -= olen;
1802 1.250.6.2 dyoung ip->ip_len = htons(ntohs(ip->ip_len) - olen);
1803 1.250.6.2 dyoung ip->ip_hl = sizeof (struct ip) >> 2;
1804 1.250.6.2 dyoung }
1805 1.250.6.2 dyoung
1806 1.250.6.2 dyoung const int inetctlerrmap[PRC_NCMDS] = {
1807 1.250.6.2 dyoung 0, 0, 0, 0,
1808 1.250.6.2 dyoung 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1809 1.250.6.2 dyoung EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1810 1.250.6.2 dyoung EMSGSIZE, EHOSTUNREACH, 0, 0,
1811 1.250.6.2 dyoung 0, 0, 0, 0,
1812 1.250.6.2 dyoung ENOPROTOOPT
1813 1.250.6.2 dyoung };
1814 1.250.6.2 dyoung
1815 1.250.6.2 dyoung /*
1816 1.250.6.2 dyoung * Forward a packet. If some error occurs return the sender
1817 1.250.6.2 dyoung * an icmp packet. Note we can't always generate a meaningful
1818 1.250.6.2 dyoung * icmp message because icmp doesn't have a large enough repertoire
1819 1.250.6.2 dyoung * of codes and types.
1820 1.250.6.2 dyoung *
1821 1.250.6.2 dyoung * If not forwarding, just drop the packet. This could be confusing
1822 1.250.6.2 dyoung * if ipforwarding was zero but some routing protocol was advancing
1823 1.250.6.2 dyoung * us as a gateway to somewhere. However, we must let the routing
1824 1.250.6.2 dyoung * protocol deal with that.
1825 1.250.6.2 dyoung *
1826 1.250.6.2 dyoung * The srcrt parameter indicates whether the packet is being forwarded
1827 1.250.6.2 dyoung * via a source route.
1828 1.250.6.2 dyoung */
1829 1.250.6.2 dyoung void
1830 1.250.6.2 dyoung ip_forward(struct mbuf *m, int srcrt)
1831 1.250.6.2 dyoung {
1832 1.250.6.2 dyoung struct ip *ip = mtod(m, struct ip *);
1833 1.250.6.2 dyoung struct rtentry *rt;
1834 1.250.6.2 dyoung int error, type = 0, code = 0, destmtu = 0;
1835 1.250.6.2 dyoung struct mbuf *mcopy;
1836 1.250.6.2 dyoung n_long dest;
1837 1.250.6.2 dyoung union {
1838 1.250.6.2 dyoung struct sockaddr dst;
1839 1.250.6.2 dyoung struct sockaddr_in dst4;
1840 1.250.6.2 dyoung } u;
1841 1.250.6.2 dyoung
1842 1.250.6.2 dyoung /*
1843 1.250.6.2 dyoung * We are now in the output path.
1844 1.250.6.2 dyoung */
1845 1.250.6.2 dyoung MCLAIM(m, &ip_tx_mowner);
1846 1.250.6.2 dyoung
1847 1.250.6.2 dyoung /*
1848 1.250.6.2 dyoung * Clear any in-bound checksum flags for this packet.
1849 1.250.6.2 dyoung */
1850 1.250.6.2 dyoung m->m_pkthdr.csum_flags = 0;
1851 1.250.6.2 dyoung
1852 1.250.6.2 dyoung dest = 0;
1853 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1854 1.250.6.2 dyoung if (ipprintfs) {
1855 1.250.6.2 dyoung printf("forward: src %s ", inet_ntoa(ip->ip_src));
1856 1.250.6.2 dyoung printf("dst %s ttl %x\n", inet_ntoa(ip->ip_dst), ip->ip_ttl);
1857 1.250.6.2 dyoung }
1858 1.250.6.2 dyoung #endif
1859 1.250.6.2 dyoung if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1860 1.250.6.2 dyoung ipstat.ips_cantforward++;
1861 1.250.6.2 dyoung m_freem(m);
1862 1.250.6.2 dyoung return;
1863 1.250.6.2 dyoung }
1864 1.250.6.2 dyoung if (ip->ip_ttl <= IPTTLDEC) {
1865 1.250.6.2 dyoung icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1866 1.250.6.2 dyoung return;
1867 1.250.6.2 dyoung }
1868 1.250.6.2 dyoung
1869 1.250.6.2 dyoung sockaddr_in_init(&u.dst4, &ip->ip_dst, 0);
1870 1.250.6.2 dyoung if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL) {
1871 1.250.6.2 dyoung icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0);
1872 1.250.6.2 dyoung return;
1873 1.250.6.2 dyoung }
1874 1.250.6.2 dyoung
1875 1.250.6.2 dyoung /*
1876 1.250.6.2 dyoung * Save at most 68 bytes of the packet in case
1877 1.250.6.2 dyoung * we need to generate an ICMP message to the src.
1878 1.250.6.2 dyoung * Pullup to avoid sharing mbuf cluster between m and mcopy.
1879 1.250.6.2 dyoung */
1880 1.250.6.2 dyoung mcopy = m_copym(m, 0, imin(ntohs(ip->ip_len), 68), M_DONTWAIT);
1881 1.250.6.2 dyoung if (mcopy)
1882 1.250.6.2 dyoung mcopy = m_pullup(mcopy, ip->ip_hl << 2);
1883 1.250.6.2 dyoung
1884 1.250.6.2 dyoung ip->ip_ttl -= IPTTLDEC;
1885 1.250.6.2 dyoung
1886 1.250.6.2 dyoung /*
1887 1.250.6.2 dyoung * If forwarding packet using same interface that it came in on,
1888 1.250.6.2 dyoung * perhaps should send a redirect to sender to shortcut a hop.
1889 1.250.6.2 dyoung * Only send redirect if source is sending directly to us,
1890 1.250.6.2 dyoung * and if packet was not source routed (or has any options).
1891 1.250.6.2 dyoung * Also, don't send redirect if forwarding using a default route
1892 1.250.6.2 dyoung * or a route modified by a redirect.
1893 1.250.6.2 dyoung */
1894 1.250.6.2 dyoung if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1895 1.250.6.2 dyoung (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1896 1.250.6.2 dyoung !in_nullhost(satocsin(rt_getkey(rt))->sin_addr) &&
1897 1.250.6.2 dyoung ipsendredirects && !srcrt) {
1898 1.250.6.2 dyoung if (rt->rt_ifa &&
1899 1.250.6.2 dyoung (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
1900 1.250.6.2 dyoung ifatoia(rt->rt_ifa)->ia_subnet) {
1901 1.250.6.2 dyoung if (rt->rt_flags & RTF_GATEWAY)
1902 1.250.6.2 dyoung dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1903 1.250.6.2 dyoung else
1904 1.250.6.2 dyoung dest = ip->ip_dst.s_addr;
1905 1.250.6.2 dyoung /*
1906 1.250.6.2 dyoung * Router requirements says to only send host
1907 1.250.6.2 dyoung * redirects.
1908 1.250.6.2 dyoung */
1909 1.250.6.2 dyoung type = ICMP_REDIRECT;
1910 1.250.6.2 dyoung code = ICMP_REDIRECT_HOST;
1911 1.250.6.2 dyoung #ifdef DIAGNOSTIC
1912 1.250.6.2 dyoung if (ipprintfs)
1913 1.250.6.2 dyoung printf("redirect (%d) to %x\n", code,
1914 1.250.6.2 dyoung (u_int32_t)dest);
1915 1.250.6.2 dyoung #endif
1916 1.250.6.2 dyoung }
1917 1.250.6.2 dyoung }
1918 1.250.6.2 dyoung
1919 1.250.6.2 dyoung error = ip_output(m, NULL, &ipforward_rt,
1920 1.250.6.2 dyoung (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
1921 1.250.6.2 dyoung (struct ip_moptions *)NULL, (struct socket *)NULL);
1922 1.250.6.2 dyoung
1923 1.250.6.2 dyoung if (error)
1924 1.250.6.2 dyoung ipstat.ips_cantforward++;
1925 1.250.6.2 dyoung else {
1926 1.250.6.2 dyoung ipstat.ips_forward++;
1927 1.250.6.2 dyoung if (type)
1928 1.250.6.2 dyoung ipstat.ips_redirectsent++;
1929 1.250.6.2 dyoung else {
1930 1.250.6.2 dyoung if (mcopy) {
1931 1.250.6.2 dyoung #ifdef GATEWAY
1932 1.250.6.2 dyoung if (mcopy->m_flags & M_CANFASTFWD)
1933 1.250.6.2 dyoung ipflow_create(&ipforward_rt, mcopy);
1934 1.250.6.2 dyoung #endif
1935 1.250.6.2 dyoung m_freem(mcopy);
1936 1.250.6.2 dyoung }
1937 1.250.6.2 dyoung return;
1938 1.250.6.2 dyoung }
1939 1.250.6.2 dyoung }
1940 1.250.6.2 dyoung if (mcopy == NULL)
1941 1.250.6.2 dyoung return;
1942 1.250.6.2 dyoung
1943 1.250.6.2 dyoung switch (error) {
1944 1.250.6.2 dyoung
1945 1.250.6.2 dyoung case 0: /* forwarded, but need redirect */
1946 1.250.6.2 dyoung /* type, code set above */
1947 1.250.6.2 dyoung break;
1948 1.250.6.2 dyoung
1949 1.250.6.2 dyoung case ENETUNREACH: /* shouldn't happen, checked above */
1950 1.250.6.2 dyoung case EHOSTUNREACH:
1951 1.250.6.2 dyoung case ENETDOWN:
1952 1.250.6.2 dyoung case EHOSTDOWN:
1953 1.250.6.2 dyoung default:
1954 1.250.6.2 dyoung type = ICMP_UNREACH;
1955 1.250.6.2 dyoung code = ICMP_UNREACH_HOST;
1956 1.250.6.2 dyoung break;
1957 1.250.6.2 dyoung
1958 1.250.6.2 dyoung case EMSGSIZE:
1959 1.250.6.2 dyoung type = ICMP_UNREACH;
1960 1.250.6.2 dyoung code = ICMP_UNREACH_NEEDFRAG;
1961 1.250.6.2 dyoung #if !defined(IPSEC) && !defined(FAST_IPSEC)
1962 1.250.6.2 dyoung if (ipforward_rt.ro_rt != NULL)
1963 1.250.6.2 dyoung destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
1964 1.250.6.2 dyoung #else
1965 1.250.6.2 dyoung /*
1966 1.250.6.2 dyoung * If the packet is routed over IPsec tunnel, tell the
1967 1.250.6.2 dyoung * originator the tunnel MTU.
1968 1.250.6.2 dyoung * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1969 1.250.6.2 dyoung * XXX quickhack!!!
1970 1.250.6.2 dyoung */
1971 1.250.6.2 dyoung if (ipforward_rt.ro_rt != NULL) {
1972 1.250.6.2 dyoung struct secpolicy *sp;
1973 1.250.6.2 dyoung int ipsecerror;
1974 1.250.6.2 dyoung size_t ipsechdr;
1975 1.250.6.2 dyoung struct route *ro;
1976 1.250.6.2 dyoung
1977 1.250.6.2 dyoung sp = ipsec4_getpolicybyaddr(mcopy,
1978 1.250.6.2 dyoung IPSEC_DIR_OUTBOUND, IP_FORWARDING,
1979 1.250.6.2 dyoung &ipsecerror);
1980 1.250.6.2 dyoung
1981 1.250.6.2 dyoung if (sp == NULL)
1982 1.250.6.2 dyoung destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
1983 1.250.6.2 dyoung else {
1984 1.250.6.2 dyoung /* count IPsec header size */
1985 1.250.6.2 dyoung ipsechdr = ipsec4_hdrsiz(mcopy,
1986 1.250.6.2 dyoung IPSEC_DIR_OUTBOUND, NULL);
1987 1.250.6.2 dyoung
1988 1.250.6.2 dyoung /*
1989 1.250.6.2 dyoung * find the correct route for outer IPv4
1990 1.250.6.2 dyoung * header, compute tunnel MTU.
1991 1.250.6.2 dyoung */
1992 1.250.6.2 dyoung
1993 1.250.6.2 dyoung if (sp->req != NULL
1994 1.250.6.2 dyoung && sp->req->sav != NULL
1995 1.250.6.2 dyoung && sp->req->sav->sah != NULL) {
1996 1.250.6.2 dyoung ro = &sp->req->sav->sah->sa_route;
1997 1.250.6.2 dyoung if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1998 1.250.6.2 dyoung destmtu =
1999 1.250.6.2 dyoung ro->ro_rt->rt_rmx.rmx_mtu ?
2000 1.250.6.2 dyoung ro->ro_rt->rt_rmx.rmx_mtu :
2001 1.250.6.2 dyoung ro->ro_rt->rt_ifp->if_mtu;
2002 1.250.6.2 dyoung destmtu -= ipsechdr;
2003 1.250.6.2 dyoung }
2004 1.250.6.2 dyoung }
2005 1.250.6.2 dyoung
2006 1.250.6.2 dyoung #ifdef IPSEC
2007 1.250.6.2 dyoung key_freesp(sp);
2008 1.250.6.2 dyoung #else
2009 1.250.6.2 dyoung KEY_FREESP(&sp);
2010 1.250.6.2 dyoung #endif
2011 1.250.6.2 dyoung }
2012 1.250.6.2 dyoung }
2013 1.250.6.2 dyoung #endif /*IPSEC*/
2014 1.250.6.2 dyoung ipstat.ips_cantfrag++;
2015 1.250.6.2 dyoung break;
2016 1.250.6.2 dyoung
2017 1.250.6.2 dyoung case ENOBUFS:
2018 1.250.6.2 dyoung #if 1
2019 1.250.6.2 dyoung /*
2020 1.250.6.2 dyoung * a router should not generate ICMP_SOURCEQUENCH as
2021 1.250.6.2 dyoung * required in RFC1812 Requirements for IP Version 4 Routers.
2022 1.250.6.2 dyoung * source quench could be a big problem under DoS attacks,
2023 1.250.6.2 dyoung * or if the underlying interface is rate-limited.
2024 1.250.6.2 dyoung */
2025 1.250.6.2 dyoung if (mcopy)
2026 1.250.6.2 dyoung m_freem(mcopy);
2027 1.250.6.2 dyoung return;
2028 1.250.6.2 dyoung #else
2029 1.250.6.2 dyoung type = ICMP_SOURCEQUENCH;
2030 1.250.6.2 dyoung code = 0;
2031 1.250.6.2 dyoung break;
2032 1.250.6.2 dyoung #endif
2033 1.250.6.2 dyoung }
2034 1.250.6.2 dyoung icmp_error(mcopy, type, code, dest, destmtu);
2035 1.250.6.2 dyoung }
2036 1.250.6.2 dyoung
2037 1.250.6.2 dyoung void
2038 1.250.6.2 dyoung ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
2039 1.250.6.2 dyoung struct mbuf *m)
2040 1.250.6.2 dyoung {
2041 1.250.6.2 dyoung
2042 1.250.6.2 dyoung if (inp->inp_socket->so_options & SO_TIMESTAMP) {
2043 1.250.6.2 dyoung struct timeval tv;
2044 1.250.6.2 dyoung
2045 1.250.6.2 dyoung microtime(&tv);
2046 1.250.6.2 dyoung *mp = sbcreatecontrol((void *) &tv, sizeof(tv),
2047 1.250.6.2 dyoung SCM_TIMESTAMP, SOL_SOCKET);
2048 1.250.6.2 dyoung if (*mp)
2049 1.250.6.2 dyoung mp = &(*mp)->m_next;
2050 1.250.6.2 dyoung }
2051 1.250.6.2 dyoung if (inp->inp_flags & INP_RECVDSTADDR) {
2052 1.250.6.2 dyoung *mp = sbcreatecontrol((void *) &ip->ip_dst,
2053 1.250.6.2 dyoung sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2054 1.250.6.2 dyoung if (*mp)
2055 1.250.6.2 dyoung mp = &(*mp)->m_next;
2056 1.250.6.2 dyoung }
2057 1.250.6.2 dyoung #ifdef notyet
2058 1.250.6.2 dyoung /*
2059 1.250.6.2 dyoung * XXX
2060 1.250.6.2 dyoung * Moving these out of udp_input() made them even more broken
2061 1.250.6.2 dyoung * than they already were.
2062 1.250.6.2 dyoung * - fenner (at) parc.xerox.com
2063 1.250.6.2 dyoung */
2064 1.250.6.2 dyoung /* options were tossed already */
2065 1.250.6.2 dyoung if (inp->inp_flags & INP_RECVOPTS) {
2066 1.250.6.2 dyoung *mp = sbcreatecontrol((void *) opts_deleted_above,
2067 1.250.6.2 dyoung sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
2068 1.250.6.2 dyoung if (*mp)
2069 1.250.6.2 dyoung mp = &(*mp)->m_next;
2070 1.250.6.2 dyoung }
2071 1.250.6.2 dyoung /* ip_srcroute doesn't do what we want here, need to fix */
2072 1.250.6.2 dyoung if (inp->inp_flags & INP_RECVRETOPTS) {
2073 1.250.6.2 dyoung *mp = sbcreatecontrol((void *) ip_srcroute(),
2074 1.250.6.2 dyoung sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
2075 1.250.6.2 dyoung if (*mp)
2076 1.250.6.2 dyoung mp = &(*mp)->m_next;
2077 1.250.6.2 dyoung }
2078 1.250.6.2 dyoung #endif
2079 1.250.6.2 dyoung if (inp->inp_flags & INP_RECVIF) {
2080 1.250.6.2 dyoung struct sockaddr_dl sdl;
2081 1.250.6.2 dyoung
2082 1.250.6.2 dyoung sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
2083 1.250.6.2 dyoung sdl.sdl_family = AF_LINK;
2084 1.250.6.2 dyoung sdl.sdl_index = m->m_pkthdr.rcvif ?
2085 1.250.6.2 dyoung m->m_pkthdr.rcvif->if_index : 0;
2086 1.250.6.2 dyoung sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
2087 1.250.6.2 dyoung *mp = sbcreatecontrol((void *) &sdl, sdl.sdl_len,
2088 1.250.6.2 dyoung IP_RECVIF, IPPROTO_IP);
2089 1.250.6.2 dyoung if (*mp)
2090 1.250.6.2 dyoung mp = &(*mp)->m_next;
2091 1.250.6.2 dyoung }
2092 1.250.6.2 dyoung }
2093 1.250.6.2 dyoung
2094 1.250.6.2 dyoung /*
2095 1.250.6.2 dyoung * sysctl helper routine for net.inet.ip.forwsrcrt.
2096 1.250.6.2 dyoung */
2097 1.250.6.2 dyoung static int
2098 1.250.6.2 dyoung sysctl_net_inet_ip_forwsrcrt(SYSCTLFN_ARGS)
2099 1.250.6.2 dyoung {
2100 1.250.6.2 dyoung int error, tmp;
2101 1.250.6.2 dyoung struct sysctlnode node;
2102 1.250.6.2 dyoung
2103 1.250.6.2 dyoung node = *rnode;
2104 1.250.6.2 dyoung tmp = ip_forwsrcrt;
2105 1.250.6.2 dyoung node.sysctl_data = &tmp;
2106 1.250.6.2 dyoung error = sysctl_lookup(SYSCTLFN_CALL(&node));
2107 1.250.6.2 dyoung if (error || newp == NULL)
2108 1.250.6.2 dyoung return (error);
2109 1.250.6.2 dyoung
2110 1.250.6.2 dyoung if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FORWSRCRT,
2111 1.250.6.2 dyoung 0, NULL, NULL, NULL))
2112 1.250.6.2 dyoung return (EPERM);
2113 1.250.6.2 dyoung
2114 1.250.6.2 dyoung ip_forwsrcrt = tmp;
2115 1.250.6.2 dyoung
2116 1.250.6.2 dyoung return (0);
2117 1.250.6.2 dyoung }
2118 1.250.6.2 dyoung
2119 1.250.6.2 dyoung /*
2120 1.250.6.2 dyoung * sysctl helper routine for net.inet.ip.mtudisctimeout. checks the
2121 1.250.6.2 dyoung * range of the new value and tweaks timers if it changes.
2122 1.250.6.2 dyoung */
2123 1.250.6.2 dyoung static int
2124 1.250.6.2 dyoung sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS)
2125 1.250.6.2 dyoung {
2126 1.250.6.2 dyoung int error, tmp;
2127 1.250.6.2 dyoung struct sysctlnode node;
2128 1.250.6.2 dyoung
2129 1.250.6.2 dyoung node = *rnode;
2130 1.250.6.2 dyoung tmp = ip_mtudisc_timeout;
2131 1.250.6.2 dyoung node.sysctl_data = &tmp;
2132 1.250.6.2 dyoung error = sysctl_lookup(SYSCTLFN_CALL(&node));
2133 1.250.6.2 dyoung if (error || newp == NULL)
2134 1.250.6.2 dyoung return (error);
2135 1.250.6.2 dyoung if (tmp < 0)
2136 1.250.6.2 dyoung return (EINVAL);
2137 1.250.6.2 dyoung
2138 1.250.6.2 dyoung ip_mtudisc_timeout = tmp;
2139 1.250.6.2 dyoung rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout);
2140 1.250.6.2 dyoung
2141 1.250.6.2 dyoung return (0);
2142 1.250.6.2 dyoung }
2143 1.250.6.2 dyoung
2144 1.250.6.2 dyoung #ifdef GATEWAY
2145 1.250.6.2 dyoung /*
2146 1.250.6.2 dyoung * sysctl helper routine for net.inet.ip.maxflows.
2147 1.250.6.2 dyoung */
2148 1.250.6.2 dyoung static int
2149 1.250.6.2 dyoung sysctl_net_inet_ip_maxflows(SYSCTLFN_ARGS)
2150 1.250.6.2 dyoung {
2151 1.250.6.2 dyoung int s;
2152 1.250.6.2 dyoung
2153 1.250.6.2 dyoung s = sysctl_lookup(SYSCTLFN_CALL(rnode));
2154 1.250.6.2 dyoung if (s || newp == NULL)
2155 1.250.6.2 dyoung return (s);
2156 1.250.6.2 dyoung
2157 1.250.6.2 dyoung s = splsoftnet();
2158 1.250.6.2 dyoung ipflow_reap(0);
2159 1.250.6.2 dyoung splx(s);
2160 1.250.6.2 dyoung
2161 1.250.6.2 dyoung return (0);
2162 1.250.6.2 dyoung }
2163 1.250.6.2 dyoung
2164 1.250.6.2 dyoung static int
2165 1.250.6.2 dyoung sysctl_net_inet_ip_hashsize(SYSCTLFN_ARGS)
2166 1.250.6.2 dyoung {
2167 1.250.6.2 dyoung int error, tmp;
2168 1.250.6.2 dyoung struct sysctlnode node;
2169 1.250.6.2 dyoung
2170 1.250.6.2 dyoung node = *rnode;
2171 1.250.6.2 dyoung tmp = ip_hashsize;
2172 1.250.6.2 dyoung node.sysctl_data = &tmp;
2173 1.250.6.2 dyoung error = sysctl_lookup(SYSCTLFN_CALL(&node));
2174 1.250.6.2 dyoung if (error || newp == NULL)
2175 1.250.6.2 dyoung return (error);
2176 1.250.6.2 dyoung
2177 1.250.6.2 dyoung if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
2178 1.250.6.2 dyoung /*
2179 1.250.6.2 dyoung * Can only fail due to malloc()
2180 1.250.6.2 dyoung */
2181 1.250.6.2 dyoung if (ipflow_invalidate_all(tmp))
2182 1.250.6.2 dyoung return ENOMEM;
2183 1.250.6.2 dyoung } else {
2184 1.250.6.2 dyoung /*
2185 1.250.6.2 dyoung * EINVAL if not a power of 2
2186 1.250.6.2 dyoung */
2187 1.250.6.2 dyoung return EINVAL;
2188 1.250.6.2 dyoung }
2189 1.250.6.2 dyoung
2190 1.250.6.2 dyoung return (0);
2191 1.250.6.2 dyoung }
2192 1.250.6.2 dyoung #endif /* GATEWAY */
2193 1.250.6.2 dyoung
2194 1.250.6.2 dyoung
2195 1.250.6.2 dyoung SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup")
2196 1.250.6.2 dyoung {
2197 1.250.6.2 dyoung extern int subnetsarelocal, hostzeroisbroadcast;
2198 1.250.6.2 dyoung
2199 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2200 1.250.6.2 dyoung CTLFLAG_PERMANENT,
2201 1.250.6.2 dyoung CTLTYPE_NODE, "net", NULL,
2202 1.250.6.2 dyoung NULL, 0, NULL, 0,
2203 1.250.6.2 dyoung CTL_NET, CTL_EOL);
2204 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2205 1.250.6.2 dyoung CTLFLAG_PERMANENT,
2206 1.250.6.2 dyoung CTLTYPE_NODE, "inet",
2207 1.250.6.2 dyoung SYSCTL_DESCR("PF_INET related settings"),
2208 1.250.6.2 dyoung NULL, 0, NULL, 0,
2209 1.250.6.2 dyoung CTL_NET, PF_INET, CTL_EOL);
2210 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2211 1.250.6.2 dyoung CTLFLAG_PERMANENT,
2212 1.250.6.2 dyoung CTLTYPE_NODE, "ip",
2213 1.250.6.2 dyoung SYSCTL_DESCR("IPv4 related settings"),
2214 1.250.6.2 dyoung NULL, 0, NULL, 0,
2215 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
2216 1.250.6.2 dyoung
2217 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2218 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2219 1.250.6.2 dyoung CTLTYPE_INT, "forwarding",
2220 1.250.6.2 dyoung SYSCTL_DESCR("Enable forwarding of INET datagrams"),
2221 1.250.6.2 dyoung NULL, 0, &ipforwarding, 0,
2222 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2223 1.250.6.2 dyoung IPCTL_FORWARDING, CTL_EOL);
2224 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2225 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2226 1.250.6.2 dyoung CTLTYPE_INT, "redirect",
2227 1.250.6.2 dyoung SYSCTL_DESCR("Enable sending of ICMP redirect messages"),
2228 1.250.6.2 dyoung NULL, 0, &ipsendredirects, 0,
2229 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2230 1.250.6.2 dyoung IPCTL_SENDREDIRECTS, CTL_EOL);
2231 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2232 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2233 1.250.6.2 dyoung CTLTYPE_INT, "ttl",
2234 1.250.6.2 dyoung SYSCTL_DESCR("Default TTL for an INET datagram"),
2235 1.250.6.2 dyoung NULL, 0, &ip_defttl, 0,
2236 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2237 1.250.6.2 dyoung IPCTL_DEFTTL, CTL_EOL);
2238 1.250.6.2 dyoung #ifdef IPCTL_DEFMTU
2239 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2240 1.250.6.2 dyoung CTLFLAG_PERMANENT /* |CTLFLAG_READWRITE? */,
2241 1.250.6.2 dyoung CTLTYPE_INT, "mtu",
2242 1.250.6.2 dyoung SYSCTL_DESCR("Default MTA for an INET route"),
2243 1.250.6.2 dyoung NULL, 0, &ip_mtu, 0,
2244 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2245 1.250.6.2 dyoung IPCTL_DEFMTU, CTL_EOL);
2246 1.250.6.2 dyoung #endif /* IPCTL_DEFMTU */
2247 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2248 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2249 1.250.6.2 dyoung CTLTYPE_INT, "forwsrcrt",
2250 1.250.6.2 dyoung SYSCTL_DESCR("Enable forwarding of source-routed "
2251 1.250.6.2 dyoung "datagrams"),
2252 1.250.6.2 dyoung sysctl_net_inet_ip_forwsrcrt, 0, &ip_forwsrcrt, 0,
2253 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2254 1.250.6.2 dyoung IPCTL_FORWSRCRT, CTL_EOL);
2255 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2256 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2257 1.250.6.2 dyoung CTLTYPE_INT, "directed-broadcast",
2258 1.250.6.2 dyoung SYSCTL_DESCR("Enable forwarding of broadcast datagrams"),
2259 1.250.6.2 dyoung NULL, 0, &ip_directedbcast, 0,
2260 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2261 1.250.6.2 dyoung IPCTL_DIRECTEDBCAST, CTL_EOL);
2262 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2263 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2264 1.250.6.2 dyoung CTLTYPE_INT, "allowsrcrt",
2265 1.250.6.2 dyoung SYSCTL_DESCR("Accept source-routed datagrams"),
2266 1.250.6.2 dyoung NULL, 0, &ip_allowsrcrt, 0,
2267 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2268 1.250.6.2 dyoung IPCTL_ALLOWSRCRT, CTL_EOL);
2269 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2270 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2271 1.250.6.2 dyoung CTLTYPE_INT, "subnetsarelocal",
2272 1.250.6.2 dyoung SYSCTL_DESCR("Whether logical subnets are considered "
2273 1.250.6.2 dyoung "local"),
2274 1.250.6.2 dyoung NULL, 0, &subnetsarelocal, 0,
2275 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2276 1.250.6.2 dyoung IPCTL_SUBNETSARELOCAL, CTL_EOL);
2277 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2278 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2279 1.250.6.2 dyoung CTLTYPE_INT, "mtudisc",
2280 1.250.6.2 dyoung SYSCTL_DESCR("Use RFC1191 Path MTU Discovery"),
2281 1.250.6.2 dyoung NULL, 0, &ip_mtudisc, 0,
2282 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2283 1.250.6.2 dyoung IPCTL_MTUDISC, CTL_EOL);
2284 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2285 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2286 1.250.6.2 dyoung CTLTYPE_INT, "anonportmin",
2287 1.250.6.2 dyoung SYSCTL_DESCR("Lowest ephemeral port number to assign"),
2288 1.250.6.2 dyoung sysctl_net_inet_ip_ports, 0, &anonportmin, 0,
2289 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2290 1.250.6.2 dyoung IPCTL_ANONPORTMIN, CTL_EOL);
2291 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2292 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2293 1.250.6.2 dyoung CTLTYPE_INT, "anonportmax",
2294 1.250.6.2 dyoung SYSCTL_DESCR("Highest ephemeral port number to assign"),
2295 1.250.6.2 dyoung sysctl_net_inet_ip_ports, 0, &anonportmax, 0,
2296 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2297 1.250.6.2 dyoung IPCTL_ANONPORTMAX, CTL_EOL);
2298 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2299 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2300 1.250.6.2 dyoung CTLTYPE_INT, "mtudisctimeout",
2301 1.250.6.2 dyoung SYSCTL_DESCR("Lifetime of a Path MTU Discovered route"),
2302 1.250.6.2 dyoung sysctl_net_inet_ip_pmtudto, 0, &ip_mtudisc_timeout, 0,
2303 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2304 1.250.6.2 dyoung IPCTL_MTUDISCTIMEOUT, CTL_EOL);
2305 1.250.6.2 dyoung #ifdef GATEWAY
2306 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2307 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2308 1.250.6.2 dyoung CTLTYPE_INT, "maxflows",
2309 1.250.6.2 dyoung SYSCTL_DESCR("Number of flows for fast forwarding"),
2310 1.250.6.2 dyoung sysctl_net_inet_ip_maxflows, 0, &ip_maxflows, 0,
2311 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2312 1.250.6.2 dyoung IPCTL_MAXFLOWS, CTL_EOL);
2313 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2314 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2315 1.250.6.2 dyoung CTLTYPE_INT, "hashsize",
2316 1.250.6.2 dyoung SYSCTL_DESCR("Size of hash table for fast forwarding (IPv4)"),
2317 1.250.6.2 dyoung sysctl_net_inet_ip_hashsize, 0, &ip_hashsize, 0,
2318 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2319 1.250.6.2 dyoung CTL_CREATE, CTL_EOL);
2320 1.250.6.2 dyoung #endif /* GATEWAY */
2321 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2322 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2323 1.250.6.2 dyoung CTLTYPE_INT, "hostzerobroadcast",
2324 1.250.6.2 dyoung SYSCTL_DESCR("All zeroes address is broadcast address"),
2325 1.250.6.2 dyoung NULL, 0, &hostzeroisbroadcast, 0,
2326 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2327 1.250.6.2 dyoung IPCTL_HOSTZEROBROADCAST, CTL_EOL);
2328 1.250.6.2 dyoung #if NGIF > 0
2329 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2330 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2331 1.250.6.2 dyoung CTLTYPE_INT, "gifttl",
2332 1.250.6.2 dyoung SYSCTL_DESCR("Default TTL for a gif tunnel datagram"),
2333 1.250.6.2 dyoung NULL, 0, &ip_gif_ttl, 0,
2334 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2335 1.250.6.2 dyoung IPCTL_GIF_TTL, CTL_EOL);
2336 1.250.6.2 dyoung #endif /* NGIF */
2337 1.250.6.2 dyoung #ifndef IPNOPRIVPORTS
2338 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2339 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2340 1.250.6.2 dyoung CTLTYPE_INT, "lowportmin",
2341 1.250.6.2 dyoung SYSCTL_DESCR("Lowest privileged ephemeral port number "
2342 1.250.6.2 dyoung "to assign"),
2343 1.250.6.2 dyoung sysctl_net_inet_ip_ports, 0, &lowportmin, 0,
2344 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2345 1.250.6.2 dyoung IPCTL_LOWPORTMIN, CTL_EOL);
2346 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2347 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2348 1.250.6.2 dyoung CTLTYPE_INT, "lowportmax",
2349 1.250.6.2 dyoung SYSCTL_DESCR("Highest privileged ephemeral port number "
2350 1.250.6.2 dyoung "to assign"),
2351 1.250.6.2 dyoung sysctl_net_inet_ip_ports, 0, &lowportmax, 0,
2352 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2353 1.250.6.2 dyoung IPCTL_LOWPORTMAX, CTL_EOL);
2354 1.250.6.2 dyoung #endif /* IPNOPRIVPORTS */
2355 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2356 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2357 1.250.6.2 dyoung CTLTYPE_INT, "maxfragpackets",
2358 1.250.6.2 dyoung SYSCTL_DESCR("Maximum number of fragments to retain for "
2359 1.250.6.2 dyoung "possible reassembly"),
2360 1.250.6.2 dyoung NULL, 0, &ip_maxfragpackets, 0,
2361 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2362 1.250.6.2 dyoung IPCTL_MAXFRAGPACKETS, CTL_EOL);
2363 1.250.6.2 dyoung #if NGRE > 0
2364 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2365 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2366 1.250.6.2 dyoung CTLTYPE_INT, "grettl",
2367 1.250.6.2 dyoung SYSCTL_DESCR("Default TTL for a gre tunnel datagram"),
2368 1.250.6.2 dyoung NULL, 0, &ip_gre_ttl, 0,
2369 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2370 1.250.6.2 dyoung IPCTL_GRE_TTL, CTL_EOL);
2371 1.250.6.2 dyoung #endif /* NGRE */
2372 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2373 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2374 1.250.6.2 dyoung CTLTYPE_INT, "checkinterface",
2375 1.250.6.2 dyoung SYSCTL_DESCR("Enable receive side of Strong ES model "
2376 1.250.6.2 dyoung "from RFC1122"),
2377 1.250.6.2 dyoung NULL, 0, &ip_checkinterface, 0,
2378 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2379 1.250.6.2 dyoung IPCTL_CHECKINTERFACE, CTL_EOL);
2380 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2381 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2382 1.250.6.2 dyoung CTLTYPE_INT, "random_id",
2383 1.250.6.2 dyoung SYSCTL_DESCR("Assign random ip_id values"),
2384 1.250.6.2 dyoung NULL, 0, &ip_do_randomid, 0,
2385 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2386 1.250.6.2 dyoung IPCTL_RANDOMID, CTL_EOL);
2387 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2388 1.250.6.2 dyoung CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2389 1.250.6.2 dyoung CTLTYPE_INT, "do_loopback_cksum",
2390 1.250.6.2 dyoung SYSCTL_DESCR("Perform IP checksum on loopback"),
2391 1.250.6.2 dyoung NULL, 0, &ip_do_loopback_cksum, 0,
2392 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP,
2393 1.250.6.2 dyoung IPCTL_LOOPBACKCKSUM, CTL_EOL);
2394 1.250.6.2 dyoung sysctl_createv(clog, 0, NULL, NULL,
2395 1.250.6.2 dyoung CTLFLAG_PERMANENT,
2396 1.250.6.2 dyoung CTLTYPE_STRUCT, "stats",
2397 1.250.6.2 dyoung SYSCTL_DESCR("IP statistics"),
2398 1.250.6.2 dyoung NULL, 0, &ipstat, sizeof(ipstat),
2399 1.250.6.2 dyoung CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS,
2400 1.250.6.2 dyoung CTL_EOL);
2401 1.250.6.2 dyoung }
2402