ip_input.c revision 1.254.4.1 1 1.254.4.1 mjf /* $NetBSD: ip_input.c,v 1.254.4.1 2007/11/19 00:49:10 mjf Exp $ */
2 1.89 itojun
3 1.89 itojun /*
4 1.89 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 1.89 itojun * All rights reserved.
6 1.152 itojun *
7 1.89 itojun * Redistribution and use in source and binary forms, with or without
8 1.89 itojun * modification, are permitted provided that the following conditions
9 1.89 itojun * are met:
10 1.89 itojun * 1. Redistributions of source code must retain the above copyright
11 1.89 itojun * notice, this list of conditions and the following disclaimer.
12 1.89 itojun * 2. Redistributions in binary form must reproduce the above copyright
13 1.89 itojun * notice, this list of conditions and the following disclaimer in the
14 1.89 itojun * documentation and/or other materials provided with the distribution.
15 1.89 itojun * 3. Neither the name of the project nor the names of its contributors
16 1.89 itojun * may be used to endorse or promote products derived from this software
17 1.89 itojun * without specific prior written permission.
18 1.152 itojun *
19 1.89 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 1.89 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.89 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.89 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 1.89 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.89 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.89 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.89 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.89 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.89 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.89 itojun * SUCH DAMAGE.
30 1.89 itojun */
31 1.76 thorpej
32 1.76 thorpej /*-
33 1.76 thorpej * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 1.76 thorpej * All rights reserved.
35 1.76 thorpej *
36 1.76 thorpej * This code is derived from software contributed to The NetBSD Foundation
37 1.76 thorpej * by Public Access Networks Corporation ("Panix"). It was developed under
38 1.76 thorpej * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 1.76 thorpej *
40 1.76 thorpej * Redistribution and use in source and binary forms, with or without
41 1.76 thorpej * modification, are permitted provided that the following conditions
42 1.76 thorpej * are met:
43 1.76 thorpej * 1. Redistributions of source code must retain the above copyright
44 1.76 thorpej * notice, this list of conditions and the following disclaimer.
45 1.76 thorpej * 2. Redistributions in binary form must reproduce the above copyright
46 1.76 thorpej * notice, this list of conditions and the following disclaimer in the
47 1.76 thorpej * documentation and/or other materials provided with the distribution.
48 1.76 thorpej * 3. All advertising materials mentioning features or use of this software
49 1.76 thorpej * must display the following acknowledgement:
50 1.76 thorpej * This product includes software developed by the NetBSD
51 1.76 thorpej * Foundation, Inc. and its contributors.
52 1.76 thorpej * 4. Neither the name of The NetBSD Foundation nor the names of its
53 1.76 thorpej * contributors may be used to endorse or promote products derived
54 1.76 thorpej * from this software without specific prior written permission.
55 1.76 thorpej *
56 1.76 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 1.76 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 1.76 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 1.76 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 1.76 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 1.76 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 1.76 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 1.76 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 1.76 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 1.76 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 1.76 thorpej * POSSIBILITY OF SUCH DAMAGE.
67 1.76 thorpej */
68 1.14 cgd
69 1.1 cgd /*
70 1.13 mycroft * Copyright (c) 1982, 1986, 1988, 1993
71 1.13 mycroft * The Regents of the University of California. All rights reserved.
72 1.1 cgd *
73 1.1 cgd * Redistribution and use in source and binary forms, with or without
74 1.1 cgd * modification, are permitted provided that the following conditions
75 1.1 cgd * are met:
76 1.1 cgd * 1. Redistributions of source code must retain the above copyright
77 1.1 cgd * notice, this list of conditions and the following disclaimer.
78 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
79 1.1 cgd * notice, this list of conditions and the following disclaimer in the
80 1.1 cgd * documentation and/or other materials provided with the distribution.
81 1.172 agc * 3. Neither the name of the University nor the names of its contributors
82 1.1 cgd * may be used to endorse or promote products derived from this software
83 1.1 cgd * without specific prior written permission.
84 1.1 cgd *
85 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
86 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
87 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
88 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
89 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
90 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
91 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
92 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
93 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
94 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
95 1.1 cgd * SUCH DAMAGE.
96 1.1 cgd *
97 1.14 cgd * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
98 1.1 cgd */
99 1.141 lukem
100 1.141 lukem #include <sys/cdefs.h>
101 1.254.4.1 mjf __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.254.4.1 2007/11/19 00:49:10 mjf Exp $");
102 1.55 scottr
103 1.184 jonathan #include "opt_inet.h"
104 1.62 matt #include "opt_gateway.h"
105 1.69 mrg #include "opt_pfil_hooks.h"
106 1.91 thorpej #include "opt_ipsec.h"
107 1.55 scottr #include "opt_mrouting.h"
108 1.167 martin #include "opt_mbuftrace.h"
109 1.135 thorpej #include "opt_inet_csum.h"
110 1.1 cgd
111 1.5 mycroft #include <sys/param.h>
112 1.5 mycroft #include <sys/systm.h>
113 1.5 mycroft #include <sys/malloc.h>
114 1.5 mycroft #include <sys/mbuf.h>
115 1.5 mycroft #include <sys/domain.h>
116 1.5 mycroft #include <sys/protosw.h>
117 1.5 mycroft #include <sys/socket.h>
118 1.44 thorpej #include <sys/socketvar.h>
119 1.5 mycroft #include <sys/errno.h>
120 1.5 mycroft #include <sys/time.h>
121 1.5 mycroft #include <sys/kernel.h>
122 1.72 thorpej #include <sys/pool.h>
123 1.28 christos #include <sys/sysctl.h>
124 1.230 elad #include <sys/kauth.h>
125 1.1 cgd
126 1.5 mycroft #include <net/if.h>
127 1.44 thorpej #include <net/if_dl.h>
128 1.5 mycroft #include <net/route.h>
129 1.45 mrg #include <net/pfil.h>
130 1.1 cgd
131 1.5 mycroft #include <netinet/in.h>
132 1.5 mycroft #include <netinet/in_systm.h>
133 1.5 mycroft #include <netinet/ip.h>
134 1.5 mycroft #include <netinet/in_pcb.h>
135 1.215 yamt #include <netinet/in_proto.h>
136 1.5 mycroft #include <netinet/in_var.h>
137 1.5 mycroft #include <netinet/ip_var.h>
138 1.5 mycroft #include <netinet/ip_icmp.h>
139 1.89 itojun /* just for gif_ttl */
140 1.89 itojun #include <netinet/in_gif.h>
141 1.89 itojun #include "gif.h"
142 1.144 martin #include <net/if_gre.h>
143 1.144 martin #include "gre.h"
144 1.111 jdolecek
145 1.111 jdolecek #ifdef MROUTING
146 1.111 jdolecek #include <netinet/ip_mroute.h>
147 1.111 jdolecek #endif
148 1.89 itojun
149 1.89 itojun #ifdef IPSEC
150 1.89 itojun #include <netinet6/ipsec.h>
151 1.89 itojun #include <netkey/key.h>
152 1.89 itojun #endif
153 1.173 jonathan #ifdef FAST_IPSEC
154 1.173 jonathan #include <netipsec/ipsec.h>
155 1.173 jonathan #include <netipsec/key.h>
156 1.173 jonathan #endif /* FAST_IPSEC*/
157 1.44 thorpej
158 1.1 cgd #ifndef IPFORWARDING
159 1.1 cgd #ifdef GATEWAY
160 1.1 cgd #define IPFORWARDING 1 /* forward IP packets not for us */
161 1.1 cgd #else /* GATEWAY */
162 1.1 cgd #define IPFORWARDING 0 /* don't forward IP packets not for us */
163 1.1 cgd #endif /* GATEWAY */
164 1.1 cgd #endif /* IPFORWARDING */
165 1.1 cgd #ifndef IPSENDREDIRECTS
166 1.1 cgd #define IPSENDREDIRECTS 1
167 1.1 cgd #endif
168 1.26 thorpej #ifndef IPFORWSRCRT
169 1.47 cjs #define IPFORWSRCRT 1 /* forward source-routed packets */
170 1.47 cjs #endif
171 1.47 cjs #ifndef IPALLOWSRCRT
172 1.48 mrg #define IPALLOWSRCRT 1 /* allow source-routed packets */
173 1.26 thorpej #endif
174 1.53 kml #ifndef IPMTUDISC
175 1.153 itojun #define IPMTUDISC 1
176 1.53 kml #endif
177 1.60 kml #ifndef IPMTUDISCTIMEOUT
178 1.61 kml #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */
179 1.60 kml #endif
180 1.53 kml
181 1.27 thorpej /*
182 1.27 thorpej * Note: DIRECTED_BROADCAST is handled this way so that previous
183 1.27 thorpej * configuration using this option will Just Work.
184 1.27 thorpej */
185 1.27 thorpej #ifndef IPDIRECTEDBCAST
186 1.27 thorpej #ifdef DIRECTED_BROADCAST
187 1.27 thorpej #define IPDIRECTEDBCAST 1
188 1.27 thorpej #else
189 1.27 thorpej #define IPDIRECTEDBCAST 0
190 1.27 thorpej #endif /* DIRECTED_BROADCAST */
191 1.27 thorpej #endif /* IPDIRECTEDBCAST */
192 1.1 cgd int ipforwarding = IPFORWARDING;
193 1.1 cgd int ipsendredirects = IPSENDREDIRECTS;
194 1.13 mycroft int ip_defttl = IPDEFTTL;
195 1.26 thorpej int ip_forwsrcrt = IPFORWSRCRT;
196 1.27 thorpej int ip_directedbcast = IPDIRECTEDBCAST;
197 1.47 cjs int ip_allowsrcrt = IPALLOWSRCRT;
198 1.53 kml int ip_mtudisc = IPMTUDISC;
199 1.156 itojun int ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
200 1.1 cgd #ifdef DIAGNOSTIC
201 1.1 cgd int ipprintfs = 0;
202 1.1 cgd #endif
203 1.184 jonathan
204 1.184 jonathan int ip_do_randomid = 0;
205 1.184 jonathan
206 1.165 christos /*
207 1.165 christos * XXX - Setting ip_checkinterface mostly implements the receive side of
208 1.165 christos * the Strong ES model described in RFC 1122, but since the routing table
209 1.165 christos * and transmit implementation do not implement the Strong ES model,
210 1.165 christos * setting this to 1 results in an odd hybrid.
211 1.165 christos *
212 1.165 christos * XXX - ip_checkinterface currently must be disabled if you use ipnat
213 1.165 christos * to translate the destination address to another local interface.
214 1.165 christos *
215 1.165 christos * XXX - ip_checkinterface must be disabled if you add IP aliases
216 1.165 christos * to the loopback interface instead of the interface where the
217 1.165 christos * packets for those addresses are received.
218 1.165 christos */
219 1.165 christos int ip_checkinterface = 0;
220 1.165 christos
221 1.1 cgd
222 1.60 kml struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
223 1.60 kml
224 1.1 cgd int ipqmaxlen = IFQ_MAXLEN;
225 1.150 matt u_long in_ifaddrhash; /* size of hash table - 1 */
226 1.150 matt int in_ifaddrentries; /* total number of addrs */
227 1.212 perry struct in_ifaddrhead in_ifaddrhead;
228 1.57 tls struct in_ifaddrhashhead *in_ifaddrhashtbl;
229 1.166 matt u_long in_multihash; /* size of hash table - 1 */
230 1.166 matt int in_multientries; /* total number of addrs */
231 1.166 matt struct in_multihashhead *in_multihashtbl;
232 1.13 mycroft struct ifqueue ipintrq;
233 1.63 matt struct ipstat ipstat;
234 1.183 jonathan uint16_t ip_id;
235 1.75 thorpej
236 1.121 thorpej #ifdef PFIL_HOOKS
237 1.121 thorpej struct pfil_head inet_pfil_hook;
238 1.121 thorpej #endif
239 1.121 thorpej
240 1.194 jonathan /*
241 1.194 jonathan * Cached copy of nmbclusters. If nbclusters is different,
242 1.194 jonathan * recalculate IP parameters derived from nmbclusters.
243 1.194 jonathan */
244 1.194 jonathan static int ip_nmbclusters; /* copy of nmbclusters */
245 1.210 perry static void ip_nmbclusters_changed(void); /* recalc limits */
246 1.194 jonathan
247 1.195 thorpej #define CHECK_NMBCLUSTER_PARAMS() \
248 1.195 thorpej do { \
249 1.195 thorpej if (__predict_false(ip_nmbclusters != nmbclusters)) \
250 1.195 thorpej ip_nmbclusters_changed(); \
251 1.195 thorpej } while (/*CONSTCOND*/0)
252 1.194 jonathan
253 1.190 jonathan /* IP datagram reassembly queues (hashed) */
254 1.190 jonathan #define IPREASS_NHASH_LOG2 6
255 1.190 jonathan #define IPREASS_NHASH (1 << IPREASS_NHASH_LOG2)
256 1.190 jonathan #define IPREASS_HMASK (IPREASS_NHASH - 1)
257 1.190 jonathan #define IPREASS_HASH(x,y) \
258 1.190 jonathan (((((x) & 0xF) | ((((x) >> 8) & 0xF) << 4)) ^ (y)) & IPREASS_HMASK)
259 1.190 jonathan struct ipqhead ipq[IPREASS_NHASH];
260 1.75 thorpej int ipq_locked;
261 1.212 perry static int ip_nfragpackets; /* packets in reass queue */
262 1.194 jonathan static int ip_nfrags; /* total fragments in reass queues */
263 1.194 jonathan
264 1.194 jonathan int ip_maxfragpackets = 200; /* limit on packets. XXX sysctl */
265 1.194 jonathan int ip_maxfrags; /* limit on fragments. XXX sysctl */
266 1.194 jonathan
267 1.194 jonathan
268 1.194 jonathan /*
269 1.194 jonathan * Additive-Increase/Multiplicative-Decrease (AIMD) strategy for
270 1.194 jonathan * IP reassembly queue buffer managment.
271 1.212 perry *
272 1.194 jonathan * We keep a count of total IP fragments (NB: not fragmented packets!)
273 1.194 jonathan * awaiting reassembly (ip_nfrags) and a limit (ip_maxfrags) on fragments.
274 1.194 jonathan * If ip_nfrags exceeds ip_maxfrags the limit, we drop half the
275 1.194 jonathan * total fragments in reassembly queues.This AIMD policy avoids
276 1.194 jonathan * repeatedly deleting single packets under heavy fragmentation load
277 1.194 jonathan * (e.g., from lossy NFS peers).
278 1.194 jonathan */
279 1.212 perry static u_int ip_reass_ttl_decr(u_int ticks);
280 1.210 perry static void ip_reass_drophalf(void);
281 1.194 jonathan
282 1.75 thorpej
283 1.223 perry static inline int ipq_lock_try(void);
284 1.223 perry static inline void ipq_unlock(void);
285 1.75 thorpej
286 1.223 perry static inline int
287 1.211 perry ipq_lock_try(void)
288 1.75 thorpej {
289 1.75 thorpej int s;
290 1.75 thorpej
291 1.132 thorpej /*
292 1.149 wiz * Use splvm() -- we're blocking things that would cause
293 1.132 thorpej * mbuf allocation.
294 1.132 thorpej */
295 1.132 thorpej s = splvm();
296 1.75 thorpej if (ipq_locked) {
297 1.75 thorpej splx(s);
298 1.75 thorpej return (0);
299 1.75 thorpej }
300 1.75 thorpej ipq_locked = 1;
301 1.75 thorpej splx(s);
302 1.75 thorpej return (1);
303 1.75 thorpej }
304 1.75 thorpej
305 1.223 perry static inline void
306 1.211 perry ipq_unlock(void)
307 1.75 thorpej {
308 1.75 thorpej int s;
309 1.75 thorpej
310 1.132 thorpej s = splvm();
311 1.75 thorpej ipq_locked = 0;
312 1.75 thorpej splx(s);
313 1.75 thorpej }
314 1.75 thorpej
315 1.75 thorpej #ifdef DIAGNOSTIC
316 1.75 thorpej #define IPQ_LOCK() \
317 1.75 thorpej do { \
318 1.75 thorpej if (ipq_lock_try() == 0) { \
319 1.75 thorpej printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
320 1.75 thorpej panic("ipq_lock"); \
321 1.75 thorpej } \
322 1.159 perry } while (/*CONSTCOND*/ 0)
323 1.75 thorpej #define IPQ_LOCK_CHECK() \
324 1.75 thorpej do { \
325 1.75 thorpej if (ipq_locked == 0) { \
326 1.75 thorpej printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
327 1.75 thorpej panic("ipq lock check"); \
328 1.75 thorpej } \
329 1.159 perry } while (/*CONSTCOND*/ 0)
330 1.75 thorpej #else
331 1.75 thorpej #define IPQ_LOCK() (void) ipq_lock_try()
332 1.75 thorpej #define IPQ_LOCK_CHECK() /* nothing */
333 1.75 thorpej #endif
334 1.75 thorpej
335 1.75 thorpej #define IPQ_UNLOCK() ipq_unlock()
336 1.1 cgd
337 1.246 ad POOL_INIT(inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl", NULL,
338 1.246 ad IPL_SOFTNET);
339 1.246 ad POOL_INIT(ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl", NULL,
340 1.246 ad IPL_VM);
341 1.72 thorpej
342 1.135 thorpej #ifdef INET_CSUM_COUNTERS
343 1.135 thorpej #include <sys/device.h>
344 1.135 thorpej
345 1.135 thorpej struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
346 1.135 thorpej NULL, "inet", "hwcsum bad");
347 1.135 thorpej struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
348 1.135 thorpej NULL, "inet", "hwcsum ok");
349 1.135 thorpej struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
350 1.135 thorpej NULL, "inet", "swcsum");
351 1.135 thorpej
352 1.135 thorpej #define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
353 1.135 thorpej
354 1.201 matt EVCNT_ATTACH_STATIC(ip_hwcsum_bad);
355 1.201 matt EVCNT_ATTACH_STATIC(ip_hwcsum_ok);
356 1.201 matt EVCNT_ATTACH_STATIC(ip_swcsum);
357 1.201 matt
358 1.135 thorpej #else
359 1.135 thorpej
360 1.135 thorpej #define INET_CSUM_COUNTER_INCR(ev) /* nothing */
361 1.135 thorpej
362 1.135 thorpej #endif /* INET_CSUM_COUNTERS */
363 1.135 thorpej
364 1.1 cgd /*
365 1.1 cgd * We need to save the IP options in case a protocol wants to respond
366 1.1 cgd * to an incoming packet over the same route if the packet got here
367 1.1 cgd * using IP source routing. This allows connection establishment and
368 1.1 cgd * maintenance when the remote end is on a network that is not known
369 1.1 cgd * to us.
370 1.1 cgd */
371 1.1 cgd int ip_nhops = 0;
372 1.1 cgd static struct ip_srcrt {
373 1.1 cgd struct in_addr dst; /* final destination */
374 1.1 cgd char nop; /* one NOP to align */
375 1.1 cgd char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
376 1.1 cgd struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
377 1.1 cgd } ip_srcrt;
378 1.1 cgd
379 1.210 perry static void save_rte(u_char *, struct in_addr);
380 1.35 mycroft
381 1.164 matt #ifdef MBUFTRACE
382 1.234 dogcow struct mowner ip_rx_mowner = MOWNER_INIT("internet", "rx");
383 1.234 dogcow struct mowner ip_tx_mowner = MOWNER_INIT("internet", "tx");
384 1.164 matt #endif
385 1.164 matt
386 1.1 cgd /*
387 1.194 jonathan * Compute IP limits derived from the value of nmbclusters.
388 1.194 jonathan */
389 1.194 jonathan static void
390 1.194 jonathan ip_nmbclusters_changed(void)
391 1.194 jonathan {
392 1.194 jonathan ip_maxfrags = nmbclusters / 4;
393 1.194 jonathan ip_nmbclusters = nmbclusters;
394 1.194 jonathan }
395 1.194 jonathan
396 1.194 jonathan /*
397 1.1 cgd * IP initialization: fill in IP protocol switch table.
398 1.1 cgd * All protocols not implemented in kernel go to raw IP protocol handler.
399 1.1 cgd */
400 1.8 mycroft void
401 1.211 perry ip_init(void)
402 1.1 cgd {
403 1.199 matt const struct protosw *pr;
404 1.109 augustss int i;
405 1.1 cgd
406 1.1 cgd pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
407 1.1 cgd if (pr == 0)
408 1.1 cgd panic("ip_init");
409 1.1 cgd for (i = 0; i < IPPROTO_MAX; i++)
410 1.1 cgd ip_protox[i] = pr - inetsw;
411 1.1 cgd for (pr = inetdomain.dom_protosw;
412 1.1 cgd pr < inetdomain.dom_protoswNPROTOSW; pr++)
413 1.1 cgd if (pr->pr_domain->dom_family == PF_INET &&
414 1.1 cgd pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
415 1.1 cgd ip_protox[pr->pr_protocol] = pr - inetsw;
416 1.192 jonathan
417 1.190 jonathan for (i = 0; i < IPREASS_NHASH; i++)
418 1.190 jonathan LIST_INIT(&ipq[i]);
419 1.190 jonathan
420 1.227 kardel ip_id = time_second & 0xfffff;
421 1.194 jonathan
422 1.1 cgd ipintrq.ifq_maxlen = ipqmaxlen;
423 1.194 jonathan ip_nmbclusters_changed();
424 1.194 jonathan
425 1.181 jonathan TAILQ_INIT(&in_ifaddrhead);
426 1.120 ad in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
427 1.120 ad M_WAITOK, &in_ifaddrhash);
428 1.166 matt in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IPMADDR,
429 1.166 matt M_WAITOK, &in_multihash);
430 1.160 itojun ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout);
431 1.73 thorpej #ifdef GATEWAY
432 1.248 liamjfoy ipflow_init(ip_hashsize);
433 1.73 thorpej #endif
434 1.121 thorpej
435 1.121 thorpej #ifdef PFIL_HOOKS
436 1.121 thorpej /* Register our Packet Filter hook. */
437 1.126 thorpej inet_pfil_hook.ph_type = PFIL_TYPE_AF;
438 1.126 thorpej inet_pfil_hook.ph_af = AF_INET;
439 1.121 thorpej i = pfil_head_register(&inet_pfil_hook);
440 1.121 thorpej if (i != 0)
441 1.121 thorpej printf("ip_init: WARNING: unable to register pfil hook, "
442 1.121 thorpej "error %d\n", i);
443 1.121 thorpej #endif /* PFIL_HOOKS */
444 1.135 thorpej
445 1.164 matt #ifdef MBUFTRACE
446 1.164 matt MOWNER_ATTACH(&ip_tx_mowner);
447 1.164 matt MOWNER_ATTACH(&ip_rx_mowner);
448 1.164 matt #endif /* MBUFTRACE */
449 1.1 cgd }
450 1.1 cgd
451 1.229 christos struct sockaddr_in ipaddr = {
452 1.229 christos .sin_len = sizeof(ipaddr),
453 1.229 christos .sin_family = AF_INET,
454 1.229 christos };
455 1.1 cgd struct route ipforward_rt;
456 1.1 cgd
457 1.1 cgd /*
458 1.89 itojun * IP software interrupt routine
459 1.89 itojun */
460 1.89 itojun void
461 1.211 perry ipintr(void)
462 1.89 itojun {
463 1.89 itojun int s;
464 1.89 itojun struct mbuf *m;
465 1.89 itojun
466 1.241 ad while (!IF_IS_EMPTY(&ipintrq)) {
467 1.132 thorpej s = splnet();
468 1.89 itojun IF_DEQUEUE(&ipintrq, m);
469 1.89 itojun splx(s);
470 1.89 itojun if (m == 0)
471 1.89 itojun return;
472 1.89 itojun ip_input(m);
473 1.89 itojun }
474 1.89 itojun }
475 1.89 itojun
476 1.89 itojun /*
477 1.1 cgd * Ip input routine. Checksum and byte swap header. If fragmented
478 1.1 cgd * try to reassemble. Process options. Pass to next level.
479 1.1 cgd */
480 1.8 mycroft void
481 1.89 itojun ip_input(struct mbuf *m)
482 1.1 cgd {
483 1.109 augustss struct ip *ip = NULL;
484 1.109 augustss struct ipq *fp;
485 1.109 augustss struct in_ifaddr *ia;
486 1.109 augustss struct ifaddr *ifa;
487 1.25 cgd struct ipqent *ipqe;
488 1.89 itojun int hlen = 0, mff, len;
489 1.100 itojun int downmatch;
490 1.165 christos int checkif;
491 1.169 itojun int srcrt = 0;
492 1.233 tls int s;
493 1.190 jonathan u_int hash;
494 1.173 jonathan #ifdef FAST_IPSEC
495 1.173 jonathan struct m_tag *mtag;
496 1.173 jonathan struct tdb_ident *tdbi;
497 1.173 jonathan struct secpolicy *sp;
498 1.233 tls int error;
499 1.173 jonathan #endif /* FAST_IPSEC */
500 1.1 cgd
501 1.164 matt MCLAIM(m, &ip_rx_mowner);
502 1.1 cgd #ifdef DIAGNOSTIC
503 1.1 cgd if ((m->m_flags & M_PKTHDR) == 0)
504 1.1 cgd panic("ipintr no HDR");
505 1.89 itojun #endif
506 1.164 matt
507 1.1 cgd /*
508 1.1 cgd * If no IP addresses have been set yet but the interfaces
509 1.1 cgd * are receiving, can't do anything with incoming packets yet.
510 1.1 cgd */
511 1.181 jonathan if (TAILQ_FIRST(&in_ifaddrhead) == 0)
512 1.1 cgd goto bad;
513 1.1 cgd ipstat.ips_total++;
514 1.154 thorpej /*
515 1.154 thorpej * If the IP header is not aligned, slurp it up into a new
516 1.154 thorpej * mbuf with space for link headers, in the event we forward
517 1.154 thorpej * it. Otherwise, if it is aligned, make sure the entire
518 1.154 thorpej * base IP header is in the first mbuf of the chain.
519 1.154 thorpej */
520 1.244 christos if (IP_HDR_ALIGNED_P(mtod(m, void *)) == 0) {
521 1.154 thorpej if ((m = m_copyup(m, sizeof(struct ip),
522 1.154 thorpej (max_linkhdr + 3) & ~3)) == NULL) {
523 1.154 thorpej /* XXXJRT new stat, please */
524 1.154 thorpej ipstat.ips_toosmall++;
525 1.154 thorpej return;
526 1.154 thorpej }
527 1.154 thorpej } else if (__predict_false(m->m_len < sizeof (struct ip))) {
528 1.154 thorpej if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
529 1.154 thorpej ipstat.ips_toosmall++;
530 1.154 thorpej return;
531 1.154 thorpej }
532 1.1 cgd }
533 1.1 cgd ip = mtod(m, struct ip *);
534 1.13 mycroft if (ip->ip_v != IPVERSION) {
535 1.13 mycroft ipstat.ips_badvers++;
536 1.13 mycroft goto bad;
537 1.13 mycroft }
538 1.1 cgd hlen = ip->ip_hl << 2;
539 1.1 cgd if (hlen < sizeof(struct ip)) { /* minimum header length */
540 1.1 cgd ipstat.ips_badhlen++;
541 1.1 cgd goto bad;
542 1.1 cgd }
543 1.1 cgd if (hlen > m->m_len) {
544 1.1 cgd if ((m = m_pullup(m, hlen)) == 0) {
545 1.1 cgd ipstat.ips_badhlen++;
546 1.89 itojun return;
547 1.1 cgd }
548 1.1 cgd ip = mtod(m, struct ip *);
549 1.1 cgd }
550 1.98 thorpej
551 1.85 hwr /*
552 1.99 thorpej * RFC1122: packets with a multicast source address are
553 1.98 thorpej * not allowed.
554 1.85 hwr */
555 1.85 hwr if (IN_MULTICAST(ip->ip_src.s_addr)) {
556 1.130 itojun ipstat.ips_badaddr++;
557 1.85 hwr goto bad;
558 1.129 itojun }
559 1.129 itojun
560 1.129 itojun /* 127/8 must not appear on wire - RFC1122 */
561 1.129 itojun if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
562 1.129 itojun (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
563 1.130 itojun if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
564 1.130 itojun ipstat.ips_badaddr++;
565 1.129 itojun goto bad;
566 1.130 itojun }
567 1.85 hwr }
568 1.85 hwr
569 1.135 thorpej switch (m->m_pkthdr.csum_flags &
570 1.137 thorpej ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
571 1.135 thorpej M_CSUM_IPv4_BAD)) {
572 1.135 thorpej case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
573 1.135 thorpej INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
574 1.135 thorpej goto badcsum;
575 1.135 thorpej
576 1.135 thorpej case M_CSUM_IPv4:
577 1.135 thorpej /* Checksum was okay. */
578 1.135 thorpej INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
579 1.135 thorpej break;
580 1.135 thorpej
581 1.135 thorpej default:
582 1.206 thorpej /*
583 1.206 thorpej * Must compute it ourselves. Maybe skip checksum on
584 1.206 thorpej * loopback interfaces.
585 1.206 thorpej */
586 1.206 thorpej if (__predict_true(!(m->m_pkthdr.rcvif->if_flags &
587 1.206 thorpej IFF_LOOPBACK) || ip_do_loopback_cksum)) {
588 1.206 thorpej INET_CSUM_COUNTER_INCR(&ip_swcsum);
589 1.206 thorpej if (in_cksum(m, hlen) != 0)
590 1.206 thorpej goto badcsum;
591 1.206 thorpej }
592 1.135 thorpej break;
593 1.1 cgd }
594 1.1 cgd
595 1.121 thorpej /* Retrieve the packet length. */
596 1.121 thorpej len = ntohs(ip->ip_len);
597 1.81 proff
598 1.81 proff /*
599 1.81 proff * Check for additional length bogosity
600 1.81 proff */
601 1.84 proff if (len < hlen) {
602 1.81 proff ipstat.ips_badlen++;
603 1.81 proff goto bad;
604 1.81 proff }
605 1.1 cgd
606 1.1 cgd /*
607 1.1 cgd * Check that the amount of data in the buffers
608 1.1 cgd * is as at least much as the IP header would have us expect.
609 1.1 cgd * Trim mbufs if longer than we expect.
610 1.1 cgd * Drop packet if shorter than we expect.
611 1.1 cgd */
612 1.35 mycroft if (m->m_pkthdr.len < len) {
613 1.1 cgd ipstat.ips_tooshort++;
614 1.1 cgd goto bad;
615 1.1 cgd }
616 1.35 mycroft if (m->m_pkthdr.len > len) {
617 1.1 cgd if (m->m_len == m->m_pkthdr.len) {
618 1.35 mycroft m->m_len = len;
619 1.35 mycroft m->m_pkthdr.len = len;
620 1.1 cgd } else
621 1.35 mycroft m_adj(m, len - m->m_pkthdr.len);
622 1.1 cgd }
623 1.1 cgd
624 1.193 scw #if defined(IPSEC)
625 1.149 wiz /* ipflow (IP fast forwarding) is not compatible with IPsec. */
626 1.94 itojun m->m_flags &= ~M_CANFASTFWD;
627 1.94 itojun #else
628 1.64 thorpej /*
629 1.64 thorpej * Assume that we can create a fast-forward IP flow entry
630 1.64 thorpej * based on this packet.
631 1.64 thorpej */
632 1.64 thorpej m->m_flags |= M_CANFASTFWD;
633 1.94 itojun #endif
634 1.64 thorpej
635 1.36 mrg #ifdef PFIL_HOOKS
636 1.33 mrg /*
637 1.64 thorpej * Run through list of hooks for input packets. If there are any
638 1.64 thorpej * filters which require that additional packets in the flow are
639 1.64 thorpej * not fast-forwarded, they must clear the M_CANFASTFWD flag.
640 1.64 thorpej * Note that filters must _never_ set this flag, as another filter
641 1.64 thorpej * in the list may have previously cleared it.
642 1.33 mrg */
643 1.127 itojun /*
644 1.127 itojun * let ipfilter look at packet on the wire,
645 1.127 itojun * not the decapsulated packet.
646 1.127 itojun */
647 1.127 itojun #ifdef IPSEC
648 1.136 itojun if (!ipsec_getnhist(m))
649 1.186 scw #elif defined(FAST_IPSEC)
650 1.186 scw if (!ipsec_indone(m))
651 1.127 itojun #else
652 1.127 itojun if (1)
653 1.127 itojun #endif
654 1.127 itojun {
655 1.169 itojun struct in_addr odst;
656 1.169 itojun
657 1.169 itojun odst = ip->ip_dst;
658 1.127 itojun if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
659 1.168 itojun PFIL_IN) != 0)
660 1.168 itojun return;
661 1.127 itojun if (m == NULL)
662 1.127 itojun return;
663 1.127 itojun ip = mtod(m, struct ip *);
664 1.142 darrenr hlen = ip->ip_hl << 2;
665 1.205 darrenr /*
666 1.205 darrenr * XXX The setting of "srcrt" here is to prevent ip_forward()
667 1.205 darrenr * from generating ICMP redirects for packets that have
668 1.205 darrenr * been redirected by a hook back out on to the same LAN that
669 1.205 darrenr * they came from and is not an indication that the packet
670 1.205 darrenr * is being inffluenced by source routing options. This
671 1.205 darrenr * allows things like
672 1.205 darrenr * "rdr tlp0 0/0 port 80 -> 1.1.1.200 3128 tcp"
673 1.205 darrenr * where tlp0 is both on the 1.1.1.0/24 network and is the
674 1.205 darrenr * default route for hosts on 1.1.1.0/24. Of course this
675 1.205 darrenr * also requires a "map tlp0 ..." to complete the story.
676 1.205 darrenr * One might argue whether or not this kind of network config.
677 1.212 perry * should be supported in this manner...
678 1.205 darrenr */
679 1.169 itojun srcrt = (odst.s_addr != ip->ip_dst.s_addr);
680 1.127 itojun }
681 1.36 mrg #endif /* PFIL_HOOKS */
682 1.123 thorpej
683 1.123 thorpej #ifdef ALTQ
684 1.123 thorpej /* XXX Temporary until ALTQ is changed to use a pfil hook */
685 1.123 thorpej if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) {
686 1.123 thorpej /* packet dropped by traffic conditioner */
687 1.123 thorpej return;
688 1.123 thorpej }
689 1.123 thorpej #endif
690 1.121 thorpej
691 1.121 thorpej /*
692 1.1 cgd * Process options and, if not destined for us,
693 1.1 cgd * ship it on. ip_dooptions returns 1 when an
694 1.1 cgd * error was detected (causing an icmp message
695 1.1 cgd * to be sent and the original packet to be freed).
696 1.1 cgd */
697 1.1 cgd ip_nhops = 0; /* for source routed packets */
698 1.1 cgd if (hlen > sizeof (struct ip) && ip_dooptions(m))
699 1.89 itojun return;
700 1.1 cgd
701 1.1 cgd /*
702 1.165 christos * Enable a consistency check between the destination address
703 1.165 christos * and the arrival interface for a unicast packet (the RFC 1122
704 1.165 christos * strong ES model) if IP forwarding is disabled and the packet
705 1.165 christos * is not locally generated.
706 1.165 christos *
707 1.165 christos * XXX - Checking also should be disabled if the destination
708 1.165 christos * address is ipnat'ed to a different interface.
709 1.165 christos *
710 1.165 christos * XXX - Checking is incompatible with IP aliases added
711 1.165 christos * to the loopback interface instead of the interface where
712 1.165 christos * the packets are received.
713 1.165 christos *
714 1.165 christos * XXX - We need to add a per ifaddr flag for this so that
715 1.165 christos * we get finer grain control.
716 1.165 christos */
717 1.165 christos checkif = ip_checkinterface && (ipforwarding == 0) &&
718 1.165 christos (m->m_pkthdr.rcvif != NULL) &&
719 1.165 christos ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0);
720 1.165 christos
721 1.165 christos /*
722 1.1 cgd * Check our list of addresses, to see if the packet is for us.
723 1.100 itojun *
724 1.100 itojun * Traditional 4.4BSD did not consult IFF_UP at all.
725 1.100 itojun * The behavior here is to treat addresses on !IFF_UP interface
726 1.100 itojun * as not mine.
727 1.1 cgd */
728 1.100 itojun downmatch = 0;
729 1.140 matt LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
730 1.97 itojun if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
731 1.165 christos if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif)
732 1.165 christos continue;
733 1.97 itojun if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
734 1.97 itojun break;
735 1.100 itojun else
736 1.100 itojun downmatch++;
737 1.97 itojun }
738 1.97 itojun }
739 1.86 thorpej if (ia != NULL)
740 1.86 thorpej goto ours;
741 1.225 christos if (m->m_pkthdr.rcvif && m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
742 1.209 matt IFADDR_FOREACH(ifa, m->m_pkthdr.rcvif) {
743 1.140 matt if (ifa->ifa_addr->sa_family != AF_INET)
744 1.140 matt continue;
745 1.57 tls ia = ifatoia(ifa);
746 1.35 mycroft if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
747 1.35 mycroft in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
748 1.20 mycroft /*
749 1.20 mycroft * Look for all-0's host part (old broadcast addr),
750 1.20 mycroft * either for subnet or net.
751 1.20 mycroft */
752 1.20 mycroft ip->ip_dst.s_addr == ia->ia_subnet ||
753 1.18 mycroft ip->ip_dst.s_addr == ia->ia_net)
754 1.1 cgd goto ours;
755 1.57 tls /*
756 1.57 tls * An interface with IP address zero accepts
757 1.57 tls * all packets that arrive on that interface.
758 1.57 tls */
759 1.57 tls if (in_nullhost(ia->ia_addr.sin_addr))
760 1.57 tls goto ours;
761 1.1 cgd }
762 1.1 cgd }
763 1.18 mycroft if (IN_MULTICAST(ip->ip_dst.s_addr)) {
764 1.4 hpeyerl struct in_multi *inm;
765 1.4 hpeyerl #ifdef MROUTING
766 1.4 hpeyerl extern struct socket *ip_mrouter;
767 1.10 brezak
768 1.4 hpeyerl if (ip_mrouter) {
769 1.4 hpeyerl /*
770 1.4 hpeyerl * If we are acting as a multicast router, all
771 1.4 hpeyerl * incoming multicast packets are passed to the
772 1.4 hpeyerl * kernel-level multicast forwarding function.
773 1.4 hpeyerl * The packet is returned (relatively) intact; if
774 1.4 hpeyerl * ip_mforward() returns a non-zero value, the packet
775 1.4 hpeyerl * must be discarded, else it may be accepted below.
776 1.4 hpeyerl *
777 1.4 hpeyerl * (The IP ident field is put in the same byte order
778 1.4 hpeyerl * as expected when ip_mforward() is called from
779 1.4 hpeyerl * ip_output().)
780 1.4 hpeyerl */
781 1.13 mycroft if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
782 1.13 mycroft ipstat.ips_cantforward++;
783 1.4 hpeyerl m_freem(m);
784 1.89 itojun return;
785 1.4 hpeyerl }
786 1.4 hpeyerl
787 1.4 hpeyerl /*
788 1.4 hpeyerl * The process-level routing demon needs to receive
789 1.4 hpeyerl * all multicast IGMP packets, whether or not this
790 1.4 hpeyerl * host belongs to their destination groups.
791 1.4 hpeyerl */
792 1.4 hpeyerl if (ip->ip_p == IPPROTO_IGMP)
793 1.4 hpeyerl goto ours;
794 1.13 mycroft ipstat.ips_forward++;
795 1.4 hpeyerl }
796 1.4 hpeyerl #endif
797 1.4 hpeyerl /*
798 1.4 hpeyerl * See if we belong to the destination multicast group on the
799 1.4 hpeyerl * arrival interface.
800 1.4 hpeyerl */
801 1.4 hpeyerl IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
802 1.4 hpeyerl if (inm == NULL) {
803 1.13 mycroft ipstat.ips_cantforward++;
804 1.4 hpeyerl m_freem(m);
805 1.89 itojun return;
806 1.4 hpeyerl }
807 1.4 hpeyerl goto ours;
808 1.4 hpeyerl }
809 1.19 mycroft if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
810 1.35 mycroft in_nullhost(ip->ip_dst))
811 1.1 cgd goto ours;
812 1.1 cgd
813 1.1 cgd /*
814 1.1 cgd * Not for us; forward if possible and desirable.
815 1.1 cgd */
816 1.1 cgd if (ipforwarding == 0) {
817 1.1 cgd ipstat.ips_cantforward++;
818 1.1 cgd m_freem(m);
819 1.100 itojun } else {
820 1.100 itojun /*
821 1.100 itojun * If ip_dst matched any of my address on !IFF_UP interface,
822 1.100 itojun * and there's no IFF_UP interface that matches ip_dst,
823 1.100 itojun * send icmp unreach. Forwarding it will result in in-kernel
824 1.100 itojun * forwarding loop till TTL goes to 0.
825 1.100 itojun */
826 1.100 itojun if (downmatch) {
827 1.100 itojun icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
828 1.100 itojun ipstat.ips_cantforward++;
829 1.100 itojun return;
830 1.100 itojun }
831 1.145 itojun #ifdef IPSEC
832 1.145 itojun if (ipsec4_in_reject(m, NULL)) {
833 1.145 itojun ipsecstat.in_polvio++;
834 1.145 itojun goto bad;
835 1.145 itojun }
836 1.145 itojun #endif
837 1.173 jonathan #ifdef FAST_IPSEC
838 1.173 jonathan mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
839 1.173 jonathan s = splsoftnet();
840 1.173 jonathan if (mtag != NULL) {
841 1.173 jonathan tdbi = (struct tdb_ident *)(mtag + 1);
842 1.173 jonathan sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
843 1.173 jonathan } else {
844 1.173 jonathan sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
845 1.212 perry IP_FORWARDING, &error);
846 1.173 jonathan }
847 1.173 jonathan if (sp == NULL) { /* NB: can happen if error */
848 1.173 jonathan splx(s);
849 1.173 jonathan /*XXX error stat???*/
850 1.173 jonathan DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
851 1.173 jonathan goto bad;
852 1.173 jonathan }
853 1.173 jonathan
854 1.173 jonathan /*
855 1.173 jonathan * Check security policy against packet attributes.
856 1.173 jonathan */
857 1.173 jonathan error = ipsec_in_reject(sp, m);
858 1.173 jonathan KEY_FREESP(&sp);
859 1.173 jonathan splx(s);
860 1.173 jonathan if (error) {
861 1.173 jonathan ipstat.ips_cantforward++;
862 1.173 jonathan goto bad;
863 1.193 scw }
864 1.193 scw
865 1.193 scw /*
866 1.193 scw * Peek at the outbound SP for this packet to determine if
867 1.193 scw * it's a Fast Forward candidate.
868 1.193 scw */
869 1.193 scw mtag = m_tag_find(m, PACKET_TAG_IPSEC_PENDING_TDB, NULL);
870 1.193 scw if (mtag != NULL)
871 1.193 scw m->m_flags &= ~M_CANFASTFWD;
872 1.193 scw else {
873 1.193 scw s = splsoftnet();
874 1.193 scw sp = ipsec4_checkpolicy(m, IPSEC_DIR_OUTBOUND,
875 1.193 scw (IP_FORWARDING |
876 1.193 scw (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
877 1.193 scw &error, NULL);
878 1.193 scw if (sp != NULL) {
879 1.193 scw m->m_flags &= ~M_CANFASTFWD;
880 1.193 scw KEY_FREESP(&sp);
881 1.193 scw }
882 1.193 scw splx(s);
883 1.173 jonathan }
884 1.173 jonathan #endif /* FAST_IPSEC */
885 1.145 itojun
886 1.169 itojun ip_forward(m, srcrt);
887 1.100 itojun }
888 1.89 itojun return;
889 1.1 cgd
890 1.1 cgd ours:
891 1.1 cgd /*
892 1.1 cgd * If offset or IP_MF are set, must reassemble.
893 1.1 cgd * Otherwise, nothing need be done.
894 1.1 cgd * (We could look in the reassembly queue to see
895 1.1 cgd * if the packet was previously fragmented,
896 1.1 cgd * but it's not worth the time; just let them time out.)
897 1.1 cgd */
898 1.155 itojun if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
899 1.155 itojun
900 1.1 cgd /*
901 1.1 cgd * Look for queue of fragments
902 1.1 cgd * of this datagram.
903 1.1 cgd */
904 1.75 thorpej IPQ_LOCK();
905 1.190 jonathan hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
906 1.250 dyoung LIST_FOREACH(fp, &ipq[hash], ipq_q) {
907 1.1 cgd if (ip->ip_id == fp->ipq_id &&
908 1.35 mycroft in_hosteq(ip->ip_src, fp->ipq_src) &&
909 1.35 mycroft in_hosteq(ip->ip_dst, fp->ipq_dst) &&
910 1.1 cgd ip->ip_p == fp->ipq_p)
911 1.1 cgd goto found;
912 1.190 jonathan
913 1.190 jonathan }
914 1.1 cgd fp = 0;
915 1.1 cgd found:
916 1.1 cgd
917 1.1 cgd /*
918 1.1 cgd * Adjust ip_len to not reflect header,
919 1.25 cgd * set ipqe_mff if more fragments are expected,
920 1.1 cgd * convert offset of this to bytes.
921 1.1 cgd */
922 1.155 itojun ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
923 1.155 itojun mff = (ip->ip_off & htons(IP_MF)) != 0;
924 1.25 cgd if (mff) {
925 1.16 cgd /*
926 1.16 cgd * Make sure that fragments have a data length
927 1.16 cgd * that's a non-zero multiple of 8 bytes.
928 1.16 cgd */
929 1.155 itojun if (ntohs(ip->ip_len) == 0 ||
930 1.155 itojun (ntohs(ip->ip_len) & 0x7) != 0) {
931 1.16 cgd ipstat.ips_badfrags++;
932 1.75 thorpej IPQ_UNLOCK();
933 1.16 cgd goto bad;
934 1.16 cgd }
935 1.16 cgd }
936 1.155 itojun ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3);
937 1.1 cgd
938 1.1 cgd /*
939 1.1 cgd * If datagram marked as having more fragments
940 1.1 cgd * or if this is not the first fragment,
941 1.1 cgd * attempt reassembly; if it succeeds, proceed.
942 1.1 cgd */
943 1.155 itojun if (mff || ip->ip_off != htons(0)) {
944 1.1 cgd ipstat.ips_fragments++;
945 1.233 tls s = splvm();
946 1.72 thorpej ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
947 1.233 tls splx(s);
948 1.25 cgd if (ipqe == NULL) {
949 1.25 cgd ipstat.ips_rcvmemdrop++;
950 1.75 thorpej IPQ_UNLOCK();
951 1.25 cgd goto bad;
952 1.25 cgd }
953 1.25 cgd ipqe->ipqe_mff = mff;
954 1.50 thorpej ipqe->ipqe_m = m;
955 1.25 cgd ipqe->ipqe_ip = ip;
956 1.190 jonathan m = ip_reass(ipqe, fp, &ipq[hash]);
957 1.75 thorpej if (m == 0) {
958 1.75 thorpej IPQ_UNLOCK();
959 1.89 itojun return;
960 1.75 thorpej }
961 1.13 mycroft ipstat.ips_reassembled++;
962 1.50 thorpej ip = mtod(m, struct ip *);
963 1.74 thorpej hlen = ip->ip_hl << 2;
964 1.155 itojun ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
965 1.1 cgd } else
966 1.1 cgd if (fp)
967 1.1 cgd ip_freef(fp);
968 1.75 thorpej IPQ_UNLOCK();
969 1.79 mycroft }
970 1.128 itojun
971 1.173 jonathan #if defined(IPSEC)
972 1.128 itojun /*
973 1.128 itojun * enforce IPsec policy checking if we are seeing last header.
974 1.128 itojun * note that we do not visit this with protocols with pcb layer
975 1.128 itojun * code - like udp/tcp/raw ip.
976 1.128 itojun */
977 1.128 itojun if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
978 1.128 itojun ipsec4_in_reject(m, NULL)) {
979 1.128 itojun ipsecstat.in_polvio++;
980 1.128 itojun goto bad;
981 1.128 itojun }
982 1.128 itojun #endif
983 1.226 liamjfoy #ifdef FAST_IPSEC
984 1.173 jonathan /*
985 1.173 jonathan * enforce IPsec policy checking if we are seeing last header.
986 1.173 jonathan * note that we do not visit this with protocols with pcb layer
987 1.173 jonathan * code - like udp/tcp/raw ip.
988 1.173 jonathan */
989 1.173 jonathan if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
990 1.173 jonathan /*
991 1.173 jonathan * Check if the packet has already had IPsec processing
992 1.173 jonathan * done. If so, then just pass it along. This tag gets
993 1.173 jonathan * set during AH, ESP, etc. input handling, before the
994 1.173 jonathan * packet is returned to the ip input queue for delivery.
995 1.212 perry */
996 1.173 jonathan mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
997 1.173 jonathan s = splsoftnet();
998 1.173 jonathan if (mtag != NULL) {
999 1.173 jonathan tdbi = (struct tdb_ident *)(mtag + 1);
1000 1.173 jonathan sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
1001 1.173 jonathan } else {
1002 1.173 jonathan sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
1003 1.212 perry IP_FORWARDING, &error);
1004 1.173 jonathan }
1005 1.173 jonathan if (sp != NULL) {
1006 1.173 jonathan /*
1007 1.173 jonathan * Check security policy against packet attributes.
1008 1.173 jonathan */
1009 1.173 jonathan error = ipsec_in_reject(sp, m);
1010 1.173 jonathan KEY_FREESP(&sp);
1011 1.173 jonathan } else {
1012 1.173 jonathan /* XXX error stat??? */
1013 1.173 jonathan error = EINVAL;
1014 1.173 jonathan DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
1015 1.173 jonathan }
1016 1.173 jonathan splx(s);
1017 1.173 jonathan if (error)
1018 1.173 jonathan goto bad;
1019 1.173 jonathan }
1020 1.173 jonathan #endif /* FAST_IPSEC */
1021 1.1 cgd
1022 1.1 cgd /*
1023 1.1 cgd * Switch out to protocol's input routine.
1024 1.1 cgd */
1025 1.82 aidan #if IFA_STATS
1026 1.122 itojun if (ia && ip)
1027 1.155 itojun ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len);
1028 1.82 aidan #endif
1029 1.1 cgd ipstat.ips_delivered++;
1030 1.89 itojun {
1031 1.89 itojun int off = hlen, nh = ip->ip_p;
1032 1.89 itojun
1033 1.89 itojun (*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
1034 1.89 itojun return;
1035 1.89 itojun }
1036 1.1 cgd bad:
1037 1.1 cgd m_freem(m);
1038 1.135 thorpej return;
1039 1.135 thorpej
1040 1.135 thorpej badcsum:
1041 1.135 thorpej ipstat.ips_badsum++;
1042 1.135 thorpej m_freem(m);
1043 1.1 cgd }
1044 1.1 cgd
1045 1.1 cgd /*
1046 1.1 cgd * Take incoming datagram fragment and try to
1047 1.1 cgd * reassemble it into whole datagram. If a chain for
1048 1.1 cgd * reassembly of this datagram already exists, then it
1049 1.1 cgd * is given as fp; otherwise have to make a chain.
1050 1.1 cgd */
1051 1.50 thorpej struct mbuf *
1052 1.211 perry ip_reass(struct ipqent *ipqe, struct ipq *fp, struct ipqhead *ipqhead)
1053 1.1 cgd {
1054 1.109 augustss struct mbuf *m = ipqe->ipqe_m;
1055 1.109 augustss struct ipqent *nq, *p, *q;
1056 1.25 cgd struct ip *ip;
1057 1.1 cgd struct mbuf *t;
1058 1.25 cgd int hlen = ipqe->ipqe_ip->ip_hl << 2;
1059 1.233 tls int i, next, s;
1060 1.1 cgd
1061 1.75 thorpej IPQ_LOCK_CHECK();
1062 1.75 thorpej
1063 1.1 cgd /*
1064 1.1 cgd * Presence of header sizes in mbufs
1065 1.1 cgd * would confuse code below.
1066 1.1 cgd */
1067 1.1 cgd m->m_data += hlen;
1068 1.1 cgd m->m_len -= hlen;
1069 1.1 cgd
1070 1.194 jonathan #ifdef notyet
1071 1.194 jonathan /* make sure fragment limit is up-to-date */
1072 1.194 jonathan CHECK_NMBCLUSTER_PARAMS();
1073 1.194 jonathan
1074 1.194 jonathan /* If we have too many fragments, drop the older half. */
1075 1.194 jonathan if (ip_nfrags >= ip_maxfrags)
1076 1.194 jonathan ip_reass_drophalf(void);
1077 1.194 jonathan #endif
1078 1.194 jonathan
1079 1.1 cgd /*
1080 1.192 jonathan * We are about to add a fragment; increment frag count.
1081 1.192 jonathan */
1082 1.192 jonathan ip_nfrags++;
1083 1.212 perry
1084 1.192 jonathan /*
1085 1.1 cgd * If first fragment to arrive, create a reassembly queue.
1086 1.1 cgd */
1087 1.1 cgd if (fp == 0) {
1088 1.131 itojun /*
1089 1.131 itojun * Enforce upper bound on number of fragmented packets
1090 1.131 itojun * for which we attempt reassembly;
1091 1.131 itojun * If maxfrag is 0, never accept fragments.
1092 1.131 itojun * If maxfrag is -1, accept all fragments without limitation.
1093 1.131 itojun */
1094 1.131 itojun if (ip_maxfragpackets < 0)
1095 1.131 itojun ;
1096 1.131 itojun else if (ip_nfragpackets >= ip_maxfragpackets)
1097 1.131 itojun goto dropfrag;
1098 1.131 itojun ip_nfragpackets++;
1099 1.50 thorpej MALLOC(fp, struct ipq *, sizeof (struct ipq),
1100 1.50 thorpej M_FTABLE, M_NOWAIT);
1101 1.50 thorpej if (fp == NULL)
1102 1.1 cgd goto dropfrag;
1103 1.190 jonathan LIST_INSERT_HEAD(ipqhead, fp, ipq_q);
1104 1.192 jonathan fp->ipq_nfrags = 1;
1105 1.1 cgd fp->ipq_ttl = IPFRAGTTL;
1106 1.25 cgd fp->ipq_p = ipqe->ipqe_ip->ip_p;
1107 1.25 cgd fp->ipq_id = ipqe->ipqe_ip->ip_id;
1108 1.148 matt TAILQ_INIT(&fp->ipq_fragq);
1109 1.25 cgd fp->ipq_src = ipqe->ipqe_ip->ip_src;
1110 1.25 cgd fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
1111 1.25 cgd p = NULL;
1112 1.1 cgd goto insert;
1113 1.192 jonathan } else {
1114 1.192 jonathan fp->ipq_nfrags++;
1115 1.1 cgd }
1116 1.1 cgd
1117 1.1 cgd /*
1118 1.1 cgd * Find a segment which begins after this one does.
1119 1.1 cgd */
1120 1.148 matt for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
1121 1.148 matt p = q, q = TAILQ_NEXT(q, ipqe_q))
1122 1.155 itojun if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
1123 1.1 cgd break;
1124 1.1 cgd
1125 1.1 cgd /*
1126 1.1 cgd * If there is a preceding segment, it may provide some of
1127 1.1 cgd * our data already. If so, drop the data from the incoming
1128 1.1 cgd * segment. If it provides all of our data, drop us.
1129 1.1 cgd */
1130 1.25 cgd if (p != NULL) {
1131 1.155 itojun i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
1132 1.155 itojun ntohs(ipqe->ipqe_ip->ip_off);
1133 1.1 cgd if (i > 0) {
1134 1.155 itojun if (i >= ntohs(ipqe->ipqe_ip->ip_len))
1135 1.1 cgd goto dropfrag;
1136 1.50 thorpej m_adj(ipqe->ipqe_m, i);
1137 1.155 itojun ipqe->ipqe_ip->ip_off =
1138 1.155 itojun htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
1139 1.155 itojun ipqe->ipqe_ip->ip_len =
1140 1.155 itojun htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
1141 1.1 cgd }
1142 1.1 cgd }
1143 1.1 cgd
1144 1.1 cgd /*
1145 1.1 cgd * While we overlap succeeding segments trim them or,
1146 1.1 cgd * if they are completely covered, dequeue them.
1147 1.1 cgd */
1148 1.155 itojun for (; q != NULL &&
1149 1.155 itojun ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
1150 1.155 itojun ntohs(q->ipqe_ip->ip_off); q = nq) {
1151 1.155 itojun i = (ntohs(ipqe->ipqe_ip->ip_off) +
1152 1.155 itojun ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
1153 1.155 itojun if (i < ntohs(q->ipqe_ip->ip_len)) {
1154 1.155 itojun q->ipqe_ip->ip_len =
1155 1.155 itojun htons(ntohs(q->ipqe_ip->ip_len) - i);
1156 1.155 itojun q->ipqe_ip->ip_off =
1157 1.155 itojun htons(ntohs(q->ipqe_ip->ip_off) + i);
1158 1.50 thorpej m_adj(q->ipqe_m, i);
1159 1.1 cgd break;
1160 1.1 cgd }
1161 1.148 matt nq = TAILQ_NEXT(q, ipqe_q);
1162 1.50 thorpej m_freem(q->ipqe_m);
1163 1.148 matt TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
1164 1.233 tls s = splvm();
1165 1.72 thorpej pool_put(&ipqent_pool, q);
1166 1.233 tls splx(s);
1167 1.192 jonathan fp->ipq_nfrags--;
1168 1.192 jonathan ip_nfrags--;
1169 1.1 cgd }
1170 1.1 cgd
1171 1.1 cgd insert:
1172 1.1 cgd /*
1173 1.1 cgd * Stick new segment in its place;
1174 1.1 cgd * check for complete reassembly.
1175 1.1 cgd */
1176 1.25 cgd if (p == NULL) {
1177 1.148 matt TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
1178 1.25 cgd } else {
1179 1.148 matt TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
1180 1.25 cgd }
1181 1.1 cgd next = 0;
1182 1.148 matt for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
1183 1.148 matt p = q, q = TAILQ_NEXT(q, ipqe_q)) {
1184 1.155 itojun if (ntohs(q->ipqe_ip->ip_off) != next)
1185 1.1 cgd return (0);
1186 1.155 itojun next += ntohs(q->ipqe_ip->ip_len);
1187 1.1 cgd }
1188 1.25 cgd if (p->ipqe_mff)
1189 1.1 cgd return (0);
1190 1.1 cgd
1191 1.1 cgd /*
1192 1.41 thorpej * Reassembly is complete. Check for a bogus message size and
1193 1.41 thorpej * concatenate fragments.
1194 1.1 cgd */
1195 1.148 matt q = TAILQ_FIRST(&fp->ipq_fragq);
1196 1.25 cgd ip = q->ipqe_ip;
1197 1.41 thorpej if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
1198 1.41 thorpej ipstat.ips_toolong++;
1199 1.41 thorpej ip_freef(fp);
1200 1.41 thorpej return (0);
1201 1.41 thorpej }
1202 1.50 thorpej m = q->ipqe_m;
1203 1.1 cgd t = m->m_next;
1204 1.1 cgd m->m_next = 0;
1205 1.1 cgd m_cat(m, t);
1206 1.148 matt nq = TAILQ_NEXT(q, ipqe_q);
1207 1.233 tls s = splvm();
1208 1.72 thorpej pool_put(&ipqent_pool, q);
1209 1.233 tls splx(s);
1210 1.25 cgd for (q = nq; q != NULL; q = nq) {
1211 1.50 thorpej t = q->ipqe_m;
1212 1.148 matt nq = TAILQ_NEXT(q, ipqe_q);
1213 1.233 tls s = splvm();
1214 1.72 thorpej pool_put(&ipqent_pool, q);
1215 1.233 tls splx(s);
1216 1.1 cgd m_cat(m, t);
1217 1.1 cgd }
1218 1.192 jonathan ip_nfrags -= fp->ipq_nfrags;
1219 1.1 cgd
1220 1.1 cgd /*
1221 1.1 cgd * Create header for new ip packet by
1222 1.1 cgd * modifying header of first packet;
1223 1.1 cgd * dequeue and discard fragment reassembly header.
1224 1.1 cgd * Make header visible.
1225 1.1 cgd */
1226 1.155 itojun ip->ip_len = htons(next);
1227 1.25 cgd ip->ip_src = fp->ipq_src;
1228 1.25 cgd ip->ip_dst = fp->ipq_dst;
1229 1.25 cgd LIST_REMOVE(fp, ipq_q);
1230 1.50 thorpej FREE(fp, M_FTABLE);
1231 1.131 itojun ip_nfragpackets--;
1232 1.1 cgd m->m_len += (ip->ip_hl << 2);
1233 1.1 cgd m->m_data -= (ip->ip_hl << 2);
1234 1.1 cgd /* some debugging cruft by sklower, below, will go away soon */
1235 1.1 cgd if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
1236 1.109 augustss int plen = 0;
1237 1.50 thorpej for (t = m; t; t = t->m_next)
1238 1.50 thorpej plen += t->m_len;
1239 1.50 thorpej m->m_pkthdr.len = plen;
1240 1.213 yamt m->m_pkthdr.csum_flags = 0;
1241 1.1 cgd }
1242 1.50 thorpej return (m);
1243 1.1 cgd
1244 1.1 cgd dropfrag:
1245 1.192 jonathan if (fp != 0)
1246 1.192 jonathan fp->ipq_nfrags--;
1247 1.192 jonathan ip_nfrags--;
1248 1.1 cgd ipstat.ips_fragdropped++;
1249 1.1 cgd m_freem(m);
1250 1.233 tls s = splvm();
1251 1.72 thorpej pool_put(&ipqent_pool, ipqe);
1252 1.233 tls splx(s);
1253 1.1 cgd return (0);
1254 1.1 cgd }
1255 1.1 cgd
1256 1.1 cgd /*
1257 1.1 cgd * Free a fragment reassembly header and all
1258 1.1 cgd * associated datagrams.
1259 1.1 cgd */
1260 1.8 mycroft void
1261 1.211 perry ip_freef(struct ipq *fp)
1262 1.1 cgd {
1263 1.109 augustss struct ipqent *q, *p;
1264 1.192 jonathan u_int nfrags = 0;
1265 1.233 tls int s;
1266 1.1 cgd
1267 1.75 thorpej IPQ_LOCK_CHECK();
1268 1.75 thorpej
1269 1.148 matt for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
1270 1.148 matt p = TAILQ_NEXT(q, ipqe_q);
1271 1.50 thorpej m_freem(q->ipqe_m);
1272 1.192 jonathan nfrags++;
1273 1.148 matt TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
1274 1.233 tls s = splvm();
1275 1.72 thorpej pool_put(&ipqent_pool, q);
1276 1.233 tls splx(s);
1277 1.1 cgd }
1278 1.192 jonathan
1279 1.192 jonathan if (nfrags != fp->ipq_nfrags)
1280 1.192 jonathan printf("ip_freef: nfrags %d != %d\n", fp->ipq_nfrags, nfrags);
1281 1.192 jonathan ip_nfrags -= nfrags;
1282 1.25 cgd LIST_REMOVE(fp, ipq_q);
1283 1.50 thorpej FREE(fp, M_FTABLE);
1284 1.131 itojun ip_nfragpackets--;
1285 1.1 cgd }
1286 1.1 cgd
1287 1.1 cgd /*
1288 1.194 jonathan * IP reassembly TTL machinery for multiplicative drop.
1289 1.194 jonathan */
1290 1.194 jonathan static u_int fragttl_histo[(IPFRAGTTL+1)];
1291 1.194 jonathan
1292 1.194 jonathan
1293 1.194 jonathan /*
1294 1.194 jonathan * Decrement TTL of all reasembly queue entries by `ticks'.
1295 1.194 jonathan * Count number of distinct fragments (as opposed to partial, fragmented
1296 1.194 jonathan * datagrams) in the reassembly queue. While we traverse the entire
1297 1.194 jonathan * reassembly queue, compute and return the median TTL over all fragments.
1298 1.194 jonathan */
1299 1.194 jonathan static u_int
1300 1.194 jonathan ip_reass_ttl_decr(u_int ticks)
1301 1.194 jonathan {
1302 1.198 matt u_int nfrags, median, dropfraction, keepfraction;
1303 1.194 jonathan struct ipq *fp, *nfp;
1304 1.198 matt int i;
1305 1.212 perry
1306 1.194 jonathan nfrags = 0;
1307 1.194 jonathan memset(fragttl_histo, 0, sizeof fragttl_histo);
1308 1.212 perry
1309 1.194 jonathan for (i = 0; i < IPREASS_NHASH; i++) {
1310 1.194 jonathan for (fp = LIST_FIRST(&ipq[i]); fp != NULL; fp = nfp) {
1311 1.194 jonathan fp->ipq_ttl = ((fp->ipq_ttl <= ticks) ?
1312 1.194 jonathan 0 : fp->ipq_ttl - ticks);
1313 1.194 jonathan nfp = LIST_NEXT(fp, ipq_q);
1314 1.194 jonathan if (fp->ipq_ttl == 0) {
1315 1.194 jonathan ipstat.ips_fragtimeout++;
1316 1.194 jonathan ip_freef(fp);
1317 1.194 jonathan } else {
1318 1.194 jonathan nfrags += fp->ipq_nfrags;
1319 1.194 jonathan fragttl_histo[fp->ipq_ttl] += fp->ipq_nfrags;
1320 1.194 jonathan }
1321 1.194 jonathan }
1322 1.194 jonathan }
1323 1.194 jonathan
1324 1.194 jonathan KASSERT(ip_nfrags == nfrags);
1325 1.194 jonathan
1326 1.194 jonathan /* Find median (or other drop fraction) in histogram. */
1327 1.194 jonathan dropfraction = (ip_nfrags / 2);
1328 1.194 jonathan keepfraction = ip_nfrags - dropfraction;
1329 1.194 jonathan for (i = IPFRAGTTL, median = 0; i >= 0; i--) {
1330 1.194 jonathan median += fragttl_histo[i];
1331 1.194 jonathan if (median >= keepfraction)
1332 1.194 jonathan break;
1333 1.194 jonathan }
1334 1.194 jonathan
1335 1.194 jonathan /* Return TTL of median (or other fraction). */
1336 1.194 jonathan return (u_int)i;
1337 1.194 jonathan }
1338 1.194 jonathan
1339 1.194 jonathan void
1340 1.194 jonathan ip_reass_drophalf(void)
1341 1.194 jonathan {
1342 1.194 jonathan
1343 1.194 jonathan u_int median_ticks;
1344 1.194 jonathan /*
1345 1.194 jonathan * Compute median TTL of all fragments, and count frags
1346 1.194 jonathan * with that TTL or lower (roughly half of all fragments).
1347 1.194 jonathan */
1348 1.194 jonathan median_ticks = ip_reass_ttl_decr(0);
1349 1.194 jonathan
1350 1.194 jonathan /* Drop half. */
1351 1.194 jonathan median_ticks = ip_reass_ttl_decr(median_ticks);
1352 1.194 jonathan
1353 1.194 jonathan }
1354 1.194 jonathan
1355 1.194 jonathan /*
1356 1.1 cgd * IP timer processing;
1357 1.1 cgd * if a timer expires on a reassembly
1358 1.1 cgd * queue, discard it.
1359 1.1 cgd */
1360 1.8 mycroft void
1361 1.211 perry ip_slowtimo(void)
1362 1.1 cgd {
1363 1.191 jonathan static u_int dropscanidx = 0;
1364 1.191 jonathan u_int i;
1365 1.194 jonathan u_int median_ttl;
1366 1.24 mycroft int s = splsoftnet();
1367 1.1 cgd
1368 1.75 thorpej IPQ_LOCK();
1369 1.194 jonathan
1370 1.194 jonathan /* Age TTL of all fragments by 1 tick .*/
1371 1.194 jonathan median_ttl = ip_reass_ttl_decr(1);
1372 1.194 jonathan
1373 1.194 jonathan /* make sure fragment limit is up-to-date */
1374 1.194 jonathan CHECK_NMBCLUSTER_PARAMS();
1375 1.194 jonathan
1376 1.194 jonathan /* If we have too many fragments, drop the older half. */
1377 1.194 jonathan if (ip_nfrags > ip_maxfrags)
1378 1.194 jonathan ip_reass_ttl_decr(median_ttl);
1379 1.194 jonathan
1380 1.131 itojun /*
1381 1.194 jonathan * If we are over the maximum number of fragmented packets
1382 1.131 itojun * (due to the limit being lowered), drain off
1383 1.190 jonathan * enough to get down to the new limit. Start draining
1384 1.190 jonathan * from the reassembly hashqueue most recently drained.
1385 1.131 itojun */
1386 1.131 itojun if (ip_maxfragpackets < 0)
1387 1.131 itojun ;
1388 1.131 itojun else {
1389 1.190 jonathan int wrapped = 0;
1390 1.190 jonathan
1391 1.190 jonathan i = dropscanidx;
1392 1.190 jonathan while (ip_nfragpackets > ip_maxfragpackets && wrapped == 0) {
1393 1.190 jonathan while (LIST_FIRST(&ipq[i]) != NULL)
1394 1.190 jonathan ip_freef(LIST_FIRST(&ipq[i]));
1395 1.190 jonathan if (++i >= IPREASS_NHASH) {
1396 1.190 jonathan i = 0;
1397 1.190 jonathan }
1398 1.190 jonathan /*
1399 1.190 jonathan * Dont scan forever even if fragment counters are
1400 1.190 jonathan * wrong: stop after scanning entire reassembly queue.
1401 1.190 jonathan */
1402 1.190 jonathan if (i == dropscanidx)
1403 1.190 jonathan wrapped = 1;
1404 1.190 jonathan }
1405 1.190 jonathan dropscanidx = i;
1406 1.131 itojun }
1407 1.75 thorpej IPQ_UNLOCK();
1408 1.1 cgd splx(s);
1409 1.1 cgd }
1410 1.1 cgd
1411 1.1 cgd /*
1412 1.1 cgd * Drain off all datagram fragments.
1413 1.1 cgd */
1414 1.8 mycroft void
1415 1.211 perry ip_drain(void)
1416 1.1 cgd {
1417 1.1 cgd
1418 1.75 thorpej /*
1419 1.75 thorpej * We may be called from a device's interrupt context. If
1420 1.75 thorpej * the ipq is already busy, just bail out now.
1421 1.75 thorpej */
1422 1.75 thorpej if (ipq_lock_try() == 0)
1423 1.75 thorpej return;
1424 1.75 thorpej
1425 1.194 jonathan /*
1426 1.194 jonathan * Drop half the total fragments now. If more mbufs are needed,
1427 1.194 jonathan * we will be called again soon.
1428 1.194 jonathan */
1429 1.194 jonathan ip_reass_drophalf();
1430 1.75 thorpej
1431 1.75 thorpej IPQ_UNLOCK();
1432 1.1 cgd }
1433 1.1 cgd
1434 1.1 cgd /*
1435 1.1 cgd * Do option processing on a datagram,
1436 1.1 cgd * possibly discarding it if bad options are encountered,
1437 1.1 cgd * or forwarding it if source-routed.
1438 1.1 cgd * Returns 1 if packet has been forwarded/freed,
1439 1.1 cgd * 0 if the packet should be processed further.
1440 1.1 cgd */
1441 1.8 mycroft int
1442 1.211 perry ip_dooptions(struct mbuf *m)
1443 1.1 cgd {
1444 1.109 augustss struct ip *ip = mtod(m, struct ip *);
1445 1.109 augustss u_char *cp, *cp0;
1446 1.109 augustss struct ip_timestamp *ipt;
1447 1.109 augustss struct in_ifaddr *ia;
1448 1.1 cgd int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1449 1.104 thorpej struct in_addr dst;
1450 1.1 cgd n_time ntime;
1451 1.1 cgd
1452 1.13 mycroft dst = ip->ip_dst;
1453 1.1 cgd cp = (u_char *)(ip + 1);
1454 1.1 cgd cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1455 1.1 cgd for (; cnt > 0; cnt -= optlen, cp += optlen) {
1456 1.1 cgd opt = cp[IPOPT_OPTVAL];
1457 1.1 cgd if (opt == IPOPT_EOL)
1458 1.1 cgd break;
1459 1.1 cgd if (opt == IPOPT_NOP)
1460 1.1 cgd optlen = 1;
1461 1.1 cgd else {
1462 1.113 itojun if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1463 1.113 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1464 1.113 itojun goto bad;
1465 1.113 itojun }
1466 1.1 cgd optlen = cp[IPOPT_OLEN];
1467 1.114 itojun if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1468 1.1 cgd code = &cp[IPOPT_OLEN] - (u_char *)ip;
1469 1.1 cgd goto bad;
1470 1.1 cgd }
1471 1.1 cgd }
1472 1.1 cgd switch (opt) {
1473 1.1 cgd
1474 1.1 cgd default:
1475 1.1 cgd break;
1476 1.1 cgd
1477 1.1 cgd /*
1478 1.1 cgd * Source routing with record.
1479 1.1 cgd * Find interface with current destination address.
1480 1.1 cgd * If none on this machine then drop if strictly routed,
1481 1.1 cgd * or do nothing if loosely routed.
1482 1.1 cgd * Record interface address and bring up next address
1483 1.1 cgd * component. If strictly routed make sure next
1484 1.1 cgd * address is on directly accessible net.
1485 1.1 cgd */
1486 1.1 cgd case IPOPT_LSRR:
1487 1.1 cgd case IPOPT_SSRR:
1488 1.47 cjs if (ip_allowsrcrt == 0) {
1489 1.47 cjs type = ICMP_UNREACH;
1490 1.47 cjs code = ICMP_UNREACH_NET_PROHIB;
1491 1.47 cjs goto bad;
1492 1.47 cjs }
1493 1.114 itojun if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1494 1.114 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1495 1.114 itojun goto bad;
1496 1.114 itojun }
1497 1.1 cgd if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1498 1.1 cgd code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1499 1.1 cgd goto bad;
1500 1.1 cgd }
1501 1.1 cgd ipaddr.sin_addr = ip->ip_dst;
1502 1.19 mycroft ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
1503 1.1 cgd if (ia == 0) {
1504 1.1 cgd if (opt == IPOPT_SSRR) {
1505 1.1 cgd type = ICMP_UNREACH;
1506 1.1 cgd code = ICMP_UNREACH_SRCFAIL;
1507 1.1 cgd goto bad;
1508 1.1 cgd }
1509 1.1 cgd /*
1510 1.1 cgd * Loose routing, and not at next destination
1511 1.1 cgd * yet; nothing to do except forward.
1512 1.1 cgd */
1513 1.1 cgd break;
1514 1.1 cgd }
1515 1.1 cgd off--; /* 0 origin */
1516 1.112 sommerfe if ((off + sizeof(struct in_addr)) > optlen) {
1517 1.1 cgd /*
1518 1.1 cgd * End of source route. Should be for us.
1519 1.1 cgd */
1520 1.1 cgd save_rte(cp, ip->ip_src);
1521 1.1 cgd break;
1522 1.1 cgd }
1523 1.1 cgd /*
1524 1.1 cgd * locate outgoing interface
1525 1.1 cgd */
1526 1.244 christos bcopy((void *)(cp + off), (void *)&ipaddr.sin_addr,
1527 1.1 cgd sizeof(ipaddr.sin_addr));
1528 1.96 thorpej if (opt == IPOPT_SSRR)
1529 1.196 itojun ia = ifatoia(ifa_ifwithladdr(sintosa(&ipaddr)));
1530 1.96 thorpej else
1531 1.1 cgd ia = ip_rtaddr(ipaddr.sin_addr);
1532 1.1 cgd if (ia == 0) {
1533 1.1 cgd type = ICMP_UNREACH;
1534 1.1 cgd code = ICMP_UNREACH_SRCFAIL;
1535 1.1 cgd goto bad;
1536 1.1 cgd }
1537 1.1 cgd ip->ip_dst = ipaddr.sin_addr;
1538 1.244 christos bcopy((void *)&ia->ia_addr.sin_addr,
1539 1.244 christos (void *)(cp + off), sizeof(struct in_addr));
1540 1.1 cgd cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1541 1.13 mycroft /*
1542 1.13 mycroft * Let ip_intr's mcast routing check handle mcast pkts
1543 1.13 mycroft */
1544 1.18 mycroft forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1545 1.1 cgd break;
1546 1.1 cgd
1547 1.1 cgd case IPOPT_RR:
1548 1.114 itojun if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1549 1.114 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1550 1.114 itojun goto bad;
1551 1.114 itojun }
1552 1.1 cgd if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1553 1.1 cgd code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1554 1.1 cgd goto bad;
1555 1.1 cgd }
1556 1.1 cgd /*
1557 1.1 cgd * If no space remains, ignore.
1558 1.1 cgd */
1559 1.1 cgd off--; /* 0 origin */
1560 1.112 sommerfe if ((off + sizeof(struct in_addr)) > optlen)
1561 1.1 cgd break;
1562 1.244 christos bcopy((void *)(&ip->ip_dst), (void *)&ipaddr.sin_addr,
1563 1.1 cgd sizeof(ipaddr.sin_addr));
1564 1.1 cgd /*
1565 1.1 cgd * locate outgoing interface; if we're the destination,
1566 1.1 cgd * use the incoming interface (should be same).
1567 1.1 cgd */
1568 1.96 thorpej if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
1569 1.96 thorpej == NULL &&
1570 1.96 thorpej (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
1571 1.1 cgd type = ICMP_UNREACH;
1572 1.1 cgd code = ICMP_UNREACH_HOST;
1573 1.1 cgd goto bad;
1574 1.1 cgd }
1575 1.244 christos bcopy((void *)&ia->ia_addr.sin_addr,
1576 1.244 christos (void *)(cp + off), sizeof(struct in_addr));
1577 1.1 cgd cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1578 1.1 cgd break;
1579 1.1 cgd
1580 1.1 cgd case IPOPT_TS:
1581 1.1 cgd code = cp - (u_char *)ip;
1582 1.1 cgd ipt = (struct ip_timestamp *)cp;
1583 1.114 itojun if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
1584 1.114 itojun code = (u_char *)&ipt->ipt_len - (u_char *)ip;
1585 1.1 cgd goto bad;
1586 1.114 itojun }
1587 1.114 itojun if (ipt->ipt_ptr < 5) {
1588 1.114 itojun code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
1589 1.114 itojun goto bad;
1590 1.114 itojun }
1591 1.15 cgd if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
1592 1.114 itojun if (++ipt->ipt_oflw == 0) {
1593 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1594 1.114 itojun (u_char *)ip;
1595 1.1 cgd goto bad;
1596 1.114 itojun }
1597 1.1 cgd break;
1598 1.1 cgd }
1599 1.104 thorpej cp0 = (cp + ipt->ipt_ptr - 1);
1600 1.1 cgd switch (ipt->ipt_flg) {
1601 1.1 cgd
1602 1.1 cgd case IPOPT_TS_TSONLY:
1603 1.1 cgd break;
1604 1.1 cgd
1605 1.1 cgd case IPOPT_TS_TSANDADDR:
1606 1.66 thorpej if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1607 1.114 itojun sizeof(struct in_addr) > ipt->ipt_len) {
1608 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1609 1.114 itojun (u_char *)ip;
1610 1.1 cgd goto bad;
1611 1.114 itojun }
1612 1.13 mycroft ipaddr.sin_addr = dst;
1613 1.96 thorpej ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
1614 1.96 thorpej m->m_pkthdr.rcvif));
1615 1.13 mycroft if (ia == 0)
1616 1.13 mycroft continue;
1617 1.104 thorpej bcopy(&ia->ia_addr.sin_addr,
1618 1.104 thorpej cp0, sizeof(struct in_addr));
1619 1.1 cgd ipt->ipt_ptr += sizeof(struct in_addr);
1620 1.1 cgd break;
1621 1.1 cgd
1622 1.1 cgd case IPOPT_TS_PRESPEC:
1623 1.66 thorpej if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1624 1.114 itojun sizeof(struct in_addr) > ipt->ipt_len) {
1625 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1626 1.114 itojun (u_char *)ip;
1627 1.1 cgd goto bad;
1628 1.114 itojun }
1629 1.104 thorpej bcopy(cp0, &ipaddr.sin_addr,
1630 1.1 cgd sizeof(struct in_addr));
1631 1.96 thorpej if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
1632 1.96 thorpej == NULL)
1633 1.1 cgd continue;
1634 1.1 cgd ipt->ipt_ptr += sizeof(struct in_addr);
1635 1.1 cgd break;
1636 1.1 cgd
1637 1.1 cgd default:
1638 1.114 itojun /* XXX can't take &ipt->ipt_flg */
1639 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1640 1.114 itojun (u_char *)ip + 1;
1641 1.1 cgd goto bad;
1642 1.1 cgd }
1643 1.1 cgd ntime = iptime();
1644 1.107 thorpej cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
1645 1.244 christos memmove((char *)cp + ipt->ipt_ptr - 1, cp0,
1646 1.1 cgd sizeof(n_time));
1647 1.1 cgd ipt->ipt_ptr += sizeof(n_time);
1648 1.1 cgd }
1649 1.1 cgd }
1650 1.1 cgd if (forward) {
1651 1.26 thorpej if (ip_forwsrcrt == 0) {
1652 1.26 thorpej type = ICMP_UNREACH;
1653 1.26 thorpej code = ICMP_UNREACH_SRCFAIL;
1654 1.26 thorpej goto bad;
1655 1.26 thorpej }
1656 1.1 cgd ip_forward(m, 1);
1657 1.1 cgd return (1);
1658 1.13 mycroft }
1659 1.13 mycroft return (0);
1660 1.1 cgd bad:
1661 1.13 mycroft icmp_error(m, type, code, 0, 0);
1662 1.13 mycroft ipstat.ips_badoptions++;
1663 1.1 cgd return (1);
1664 1.1 cgd }
1665 1.1 cgd
1666 1.1 cgd /*
1667 1.1 cgd * Given address of next destination (final or next hop),
1668 1.1 cgd * return internet address info of interface to be used to get there.
1669 1.1 cgd */
1670 1.1 cgd struct in_ifaddr *
1671 1.211 perry ip_rtaddr(struct in_addr dst)
1672 1.1 cgd {
1673 1.249 dyoung struct rtentry *rt;
1674 1.249 dyoung union {
1675 1.249 dyoung struct sockaddr dst;
1676 1.249 dyoung struct sockaddr_in dst4;
1677 1.249 dyoung } u;
1678 1.249 dyoung
1679 1.249 dyoung sockaddr_in_init(&u.dst4, &dst, 0);
1680 1.249 dyoung
1681 1.249 dyoung if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL)
1682 1.249 dyoung return NULL;
1683 1.249 dyoung
1684 1.249 dyoung return ifatoia(rt->rt_ifa);
1685 1.1 cgd }
1686 1.1 cgd
1687 1.1 cgd /*
1688 1.1 cgd * Save incoming source route for use in replies,
1689 1.1 cgd * to be picked up later by ip_srcroute if the receiver is interested.
1690 1.1 cgd */
1691 1.13 mycroft void
1692 1.211 perry save_rte(u_char *option, struct in_addr dst)
1693 1.1 cgd {
1694 1.1 cgd unsigned olen;
1695 1.1 cgd
1696 1.1 cgd olen = option[IPOPT_OLEN];
1697 1.1 cgd #ifdef DIAGNOSTIC
1698 1.1 cgd if (ipprintfs)
1699 1.39 christos printf("save_rte: olen %d\n", olen);
1700 1.89 itojun #endif /* 0 */
1701 1.1 cgd if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1702 1.1 cgd return;
1703 1.244 christos bcopy((void *)option, (void *)ip_srcrt.srcopt, olen);
1704 1.1 cgd ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1705 1.1 cgd ip_srcrt.dst = dst;
1706 1.1 cgd }
1707 1.1 cgd
1708 1.1 cgd /*
1709 1.1 cgd * Retrieve incoming source route for use in replies,
1710 1.1 cgd * in the same form used by setsockopt.
1711 1.1 cgd * The first hop is placed before the options, will be removed later.
1712 1.1 cgd */
1713 1.1 cgd struct mbuf *
1714 1.211 perry ip_srcroute(void)
1715 1.1 cgd {
1716 1.109 augustss struct in_addr *p, *q;
1717 1.109 augustss struct mbuf *m;
1718 1.1 cgd
1719 1.1 cgd if (ip_nhops == 0)
1720 1.237 dyoung return NULL;
1721 1.1 cgd m = m_get(M_DONTWAIT, MT_SOOPTS);
1722 1.1 cgd if (m == 0)
1723 1.237 dyoung return NULL;
1724 1.1 cgd
1725 1.164 matt MCLAIM(m, &inetdomain.dom_mowner);
1726 1.13 mycroft #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1727 1.1 cgd
1728 1.1 cgd /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1729 1.1 cgd m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1730 1.1 cgd OPTSIZ;
1731 1.1 cgd #ifdef DIAGNOSTIC
1732 1.1 cgd if (ipprintfs)
1733 1.39 christos printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1734 1.1 cgd #endif
1735 1.1 cgd
1736 1.1 cgd /*
1737 1.1 cgd * First save first hop for return route
1738 1.1 cgd */
1739 1.1 cgd p = &ip_srcrt.route[ip_nhops - 1];
1740 1.1 cgd *(mtod(m, struct in_addr *)) = *p--;
1741 1.1 cgd #ifdef DIAGNOSTIC
1742 1.1 cgd if (ipprintfs)
1743 1.39 christos printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
1744 1.1 cgd #endif
1745 1.1 cgd
1746 1.1 cgd /*
1747 1.1 cgd * Copy option fields and padding (nop) to mbuf.
1748 1.1 cgd */
1749 1.1 cgd ip_srcrt.nop = IPOPT_NOP;
1750 1.1 cgd ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1751 1.244 christos memmove(mtod(m, char *) + sizeof(struct in_addr), &ip_srcrt.nop,
1752 1.244 christos OPTSIZ);
1753 1.244 christos q = (struct in_addr *)(mtod(m, char *) +
1754 1.1 cgd sizeof(struct in_addr) + OPTSIZ);
1755 1.1 cgd #undef OPTSIZ
1756 1.1 cgd /*
1757 1.1 cgd * Record return path as an IP source route,
1758 1.1 cgd * reversing the path (pointers are now aligned).
1759 1.1 cgd */
1760 1.1 cgd while (p >= ip_srcrt.route) {
1761 1.1 cgd #ifdef DIAGNOSTIC
1762 1.1 cgd if (ipprintfs)
1763 1.39 christos printf(" %x", ntohl(q->s_addr));
1764 1.1 cgd #endif
1765 1.1 cgd *q++ = *p--;
1766 1.1 cgd }
1767 1.1 cgd /*
1768 1.1 cgd * Last hop goes to final destination.
1769 1.1 cgd */
1770 1.1 cgd *q = ip_srcrt.dst;
1771 1.1 cgd #ifdef DIAGNOSTIC
1772 1.1 cgd if (ipprintfs)
1773 1.39 christos printf(" %x\n", ntohl(q->s_addr));
1774 1.1 cgd #endif
1775 1.1 cgd return (m);
1776 1.1 cgd }
1777 1.1 cgd
1778 1.139 matt const int inetctlerrmap[PRC_NCMDS] = {
1779 1.1 cgd 0, 0, 0, 0,
1780 1.1 cgd 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1781 1.1 cgd EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1782 1.1 cgd EMSGSIZE, EHOSTUNREACH, 0, 0,
1783 1.1 cgd 0, 0, 0, 0,
1784 1.1 cgd ENOPROTOOPT
1785 1.1 cgd };
1786 1.1 cgd
1787 1.1 cgd /*
1788 1.1 cgd * Forward a packet. If some error occurs return the sender
1789 1.1 cgd * an icmp packet. Note we can't always generate a meaningful
1790 1.1 cgd * icmp message because icmp doesn't have a large enough repertoire
1791 1.1 cgd * of codes and types.
1792 1.1 cgd *
1793 1.1 cgd * If not forwarding, just drop the packet. This could be confusing
1794 1.1 cgd * if ipforwarding was zero but some routing protocol was advancing
1795 1.1 cgd * us as a gateway to somewhere. However, we must let the routing
1796 1.1 cgd * protocol deal with that.
1797 1.1 cgd *
1798 1.1 cgd * The srcrt parameter indicates whether the packet is being forwarded
1799 1.1 cgd * via a source route.
1800 1.1 cgd */
1801 1.13 mycroft void
1802 1.211 perry ip_forward(struct mbuf *m, int srcrt)
1803 1.1 cgd {
1804 1.109 augustss struct ip *ip = mtod(m, struct ip *);
1805 1.109 augustss struct rtentry *rt;
1806 1.220 christos int error, type = 0, code = 0, destmtu = 0;
1807 1.1 cgd struct mbuf *mcopy;
1808 1.13 mycroft n_long dest;
1809 1.249 dyoung union {
1810 1.249 dyoung struct sockaddr dst;
1811 1.249 dyoung struct sockaddr_in dst4;
1812 1.249 dyoung } u;
1813 1.164 matt
1814 1.164 matt /*
1815 1.164 matt * We are now in the output path.
1816 1.164 matt */
1817 1.164 matt MCLAIM(m, &ip_tx_mowner);
1818 1.135 thorpej
1819 1.135 thorpej /*
1820 1.135 thorpej * Clear any in-bound checksum flags for this packet.
1821 1.135 thorpej */
1822 1.135 thorpej m->m_pkthdr.csum_flags = 0;
1823 1.1 cgd
1824 1.13 mycroft dest = 0;
1825 1.1 cgd #ifdef DIAGNOSTIC
1826 1.224 joerg if (ipprintfs) {
1827 1.224 joerg printf("forward: src %s ", inet_ntoa(ip->ip_src));
1828 1.224 joerg printf("dst %s ttl %x\n", inet_ntoa(ip->ip_dst), ip->ip_ttl);
1829 1.224 joerg }
1830 1.1 cgd #endif
1831 1.93 sommerfe if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1832 1.1 cgd ipstat.ips_cantforward++;
1833 1.1 cgd m_freem(m);
1834 1.1 cgd return;
1835 1.1 cgd }
1836 1.1 cgd if (ip->ip_ttl <= IPTTLDEC) {
1837 1.13 mycroft icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1838 1.1 cgd return;
1839 1.1 cgd }
1840 1.1 cgd
1841 1.249 dyoung sockaddr_in_init(&u.dst4, &ip->ip_dst, 0);
1842 1.249 dyoung if ((rt = rtcache_lookup(&ipforward_rt, &u.dst)) == NULL) {
1843 1.249 dyoung icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_NET, dest, 0);
1844 1.249 dyoung return;
1845 1.1 cgd }
1846 1.1 cgd
1847 1.1 cgd /*
1848 1.34 mycroft * Save at most 68 bytes of the packet in case
1849 1.1 cgd * we need to generate an ICMP message to the src.
1850 1.119 itojun * Pullup to avoid sharing mbuf cluster between m and mcopy.
1851 1.1 cgd */
1852 1.155 itojun mcopy = m_copym(m, 0, imin(ntohs(ip->ip_len), 68), M_DONTWAIT);
1853 1.119 itojun if (mcopy)
1854 1.119 itojun mcopy = m_pullup(mcopy, ip->ip_hl << 2);
1855 1.1 cgd
1856 1.221 christos ip->ip_ttl -= IPTTLDEC;
1857 1.221 christos
1858 1.1 cgd /*
1859 1.1 cgd * If forwarding packet using same interface that it came in on,
1860 1.1 cgd * perhaps should send a redirect to sender to shortcut a hop.
1861 1.1 cgd * Only send redirect if source is sending directly to us,
1862 1.1 cgd * and if packet was not source routed (or has any options).
1863 1.1 cgd * Also, don't send redirect if forwarding using a default route
1864 1.1 cgd * or a route modified by a redirect.
1865 1.1 cgd */
1866 1.1 cgd if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1867 1.1 cgd (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1868 1.250 dyoung !in_nullhost(satocsin(rt_getkey(rt))->sin_addr) &&
1869 1.1 cgd ipsendredirects && !srcrt) {
1870 1.19 mycroft if (rt->rt_ifa &&
1871 1.19 mycroft (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
1872 1.19 mycroft ifatoia(rt->rt_ifa)->ia_subnet) {
1873 1.77 thorpej if (rt->rt_flags & RTF_GATEWAY)
1874 1.77 thorpej dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1875 1.77 thorpej else
1876 1.77 thorpej dest = ip->ip_dst.s_addr;
1877 1.77 thorpej /*
1878 1.77 thorpej * Router requirements says to only send host
1879 1.77 thorpej * redirects.
1880 1.77 thorpej */
1881 1.77 thorpej type = ICMP_REDIRECT;
1882 1.77 thorpej code = ICMP_REDIRECT_HOST;
1883 1.1 cgd #ifdef DIAGNOSTIC
1884 1.77 thorpej if (ipprintfs)
1885 1.77 thorpej printf("redirect (%d) to %x\n", code,
1886 1.77 thorpej (u_int32_t)dest);
1887 1.1 cgd #endif
1888 1.1 cgd }
1889 1.1 cgd }
1890 1.1 cgd
1891 1.238 dyoung error = ip_output(m, NULL, &ipforward_rt,
1892 1.173 jonathan (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
1893 1.174 itojun (struct ip_moptions *)NULL, (struct socket *)NULL);
1894 1.173 jonathan
1895 1.1 cgd if (error)
1896 1.1 cgd ipstat.ips_cantforward++;
1897 1.1 cgd else {
1898 1.1 cgd ipstat.ips_forward++;
1899 1.1 cgd if (type)
1900 1.1 cgd ipstat.ips_redirectsent++;
1901 1.1 cgd else {
1902 1.63 matt if (mcopy) {
1903 1.63 matt #ifdef GATEWAY
1904 1.64 thorpej if (mcopy->m_flags & M_CANFASTFWD)
1905 1.64 thorpej ipflow_create(&ipforward_rt, mcopy);
1906 1.63 matt #endif
1907 1.1 cgd m_freem(mcopy);
1908 1.63 matt }
1909 1.1 cgd return;
1910 1.1 cgd }
1911 1.1 cgd }
1912 1.1 cgd if (mcopy == NULL)
1913 1.1 cgd return;
1914 1.13 mycroft
1915 1.1 cgd switch (error) {
1916 1.1 cgd
1917 1.1 cgd case 0: /* forwarded, but need redirect */
1918 1.1 cgd /* type, code set above */
1919 1.1 cgd break;
1920 1.1 cgd
1921 1.1 cgd case ENETUNREACH: /* shouldn't happen, checked above */
1922 1.1 cgd case EHOSTUNREACH:
1923 1.1 cgd case ENETDOWN:
1924 1.1 cgd case EHOSTDOWN:
1925 1.1 cgd default:
1926 1.1 cgd type = ICMP_UNREACH;
1927 1.1 cgd code = ICMP_UNREACH_HOST;
1928 1.1 cgd break;
1929 1.1 cgd
1930 1.1 cgd case EMSGSIZE:
1931 1.1 cgd type = ICMP_UNREACH;
1932 1.1 cgd code = ICMP_UNREACH_NEEDFRAG;
1933 1.173 jonathan #if !defined(IPSEC) && !defined(FAST_IPSEC)
1934 1.238 dyoung if (ipforward_rt.ro_rt != NULL)
1935 1.220 christos destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
1936 1.89 itojun #else
1937 1.89 itojun /*
1938 1.89 itojun * If the packet is routed over IPsec tunnel, tell the
1939 1.89 itojun * originator the tunnel MTU.
1940 1.89 itojun * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1941 1.89 itojun * XXX quickhack!!!
1942 1.89 itojun */
1943 1.238 dyoung if (ipforward_rt.ro_rt != NULL) {
1944 1.89 itojun struct secpolicy *sp;
1945 1.89 itojun int ipsecerror;
1946 1.95 itojun size_t ipsechdr;
1947 1.89 itojun struct route *ro;
1948 1.89 itojun
1949 1.89 itojun sp = ipsec4_getpolicybyaddr(mcopy,
1950 1.170 itojun IPSEC_DIR_OUTBOUND, IP_FORWARDING,
1951 1.170 itojun &ipsecerror);
1952 1.89 itojun
1953 1.89 itojun if (sp == NULL)
1954 1.220 christos destmtu = ipforward_rt.ro_rt->rt_ifp->if_mtu;
1955 1.89 itojun else {
1956 1.89 itojun /* count IPsec header size */
1957 1.95 itojun ipsechdr = ipsec4_hdrsiz(mcopy,
1958 1.170 itojun IPSEC_DIR_OUTBOUND, NULL);
1959 1.89 itojun
1960 1.89 itojun /*
1961 1.89 itojun * find the correct route for outer IPv4
1962 1.89 itojun * header, compute tunnel MTU.
1963 1.89 itojun */
1964 1.220 christos
1965 1.89 itojun if (sp->req != NULL
1966 1.95 itojun && sp->req->sav != NULL
1967 1.95 itojun && sp->req->sav->sah != NULL) {
1968 1.95 itojun ro = &sp->req->sav->sah->sa_route;
1969 1.89 itojun if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1970 1.220 christos destmtu =
1971 1.151 itojun ro->ro_rt->rt_rmx.rmx_mtu ?
1972 1.151 itojun ro->ro_rt->rt_rmx.rmx_mtu :
1973 1.89 itojun ro->ro_rt->rt_ifp->if_mtu;
1974 1.220 christos destmtu -= ipsechdr;
1975 1.89 itojun }
1976 1.89 itojun }
1977 1.89 itojun
1978 1.173 jonathan #ifdef IPSEC
1979 1.89 itojun key_freesp(sp);
1980 1.173 jonathan #else
1981 1.173 jonathan KEY_FREESP(&sp);
1982 1.173 jonathan #endif
1983 1.89 itojun }
1984 1.89 itojun }
1985 1.89 itojun #endif /*IPSEC*/
1986 1.1 cgd ipstat.ips_cantfrag++;
1987 1.1 cgd break;
1988 1.1 cgd
1989 1.1 cgd case ENOBUFS:
1990 1.143 itojun #if 1
1991 1.143 itojun /*
1992 1.143 itojun * a router should not generate ICMP_SOURCEQUENCH as
1993 1.143 itojun * required in RFC1812 Requirements for IP Version 4 Routers.
1994 1.143 itojun * source quench could be a big problem under DoS attacks,
1995 1.149 wiz * or if the underlying interface is rate-limited.
1996 1.143 itojun */
1997 1.143 itojun if (mcopy)
1998 1.143 itojun m_freem(mcopy);
1999 1.143 itojun return;
2000 1.143 itojun #else
2001 1.1 cgd type = ICMP_SOURCEQUENCH;
2002 1.1 cgd code = 0;
2003 1.1 cgd break;
2004 1.143 itojun #endif
2005 1.1 cgd }
2006 1.220 christos icmp_error(mcopy, type, code, dest, destmtu);
2007 1.44 thorpej }
2008 1.44 thorpej
2009 1.44 thorpej void
2010 1.211 perry ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
2011 1.211 perry struct mbuf *m)
2012 1.44 thorpej {
2013 1.44 thorpej
2014 1.44 thorpej if (inp->inp_socket->so_options & SO_TIMESTAMP) {
2015 1.44 thorpej struct timeval tv;
2016 1.44 thorpej
2017 1.44 thorpej microtime(&tv);
2018 1.244 christos *mp = sbcreatecontrol((void *) &tv, sizeof(tv),
2019 1.44 thorpej SCM_TIMESTAMP, SOL_SOCKET);
2020 1.44 thorpej if (*mp)
2021 1.44 thorpej mp = &(*mp)->m_next;
2022 1.44 thorpej }
2023 1.44 thorpej if (inp->inp_flags & INP_RECVDSTADDR) {
2024 1.244 christos *mp = sbcreatecontrol((void *) &ip->ip_dst,
2025 1.44 thorpej sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
2026 1.44 thorpej if (*mp)
2027 1.44 thorpej mp = &(*mp)->m_next;
2028 1.44 thorpej }
2029 1.44 thorpej #ifdef notyet
2030 1.44 thorpej /*
2031 1.44 thorpej * XXX
2032 1.44 thorpej * Moving these out of udp_input() made them even more broken
2033 1.44 thorpej * than they already were.
2034 1.44 thorpej * - fenner (at) parc.xerox.com
2035 1.44 thorpej */
2036 1.44 thorpej /* options were tossed already */
2037 1.44 thorpej if (inp->inp_flags & INP_RECVOPTS) {
2038 1.244 christos *mp = sbcreatecontrol((void *) opts_deleted_above,
2039 1.44 thorpej sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
2040 1.44 thorpej if (*mp)
2041 1.44 thorpej mp = &(*mp)->m_next;
2042 1.44 thorpej }
2043 1.44 thorpej /* ip_srcroute doesn't do what we want here, need to fix */
2044 1.44 thorpej if (inp->inp_flags & INP_RECVRETOPTS) {
2045 1.244 christos *mp = sbcreatecontrol((void *) ip_srcroute(),
2046 1.44 thorpej sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
2047 1.44 thorpej if (*mp)
2048 1.44 thorpej mp = &(*mp)->m_next;
2049 1.44 thorpej }
2050 1.44 thorpej #endif
2051 1.44 thorpej if (inp->inp_flags & INP_RECVIF) {
2052 1.44 thorpej struct sockaddr_dl sdl;
2053 1.44 thorpej
2054 1.252 dyoung sockaddr_dl_init(&sdl, sizeof(sdl),
2055 1.252 dyoung (m->m_pkthdr.rcvif != NULL)
2056 1.252 dyoung ? m->m_pkthdr.rcvif->if_index
2057 1.252 dyoung : 0,
2058 1.252 dyoung 0, NULL, 0, NULL, 0);
2059 1.251 dyoung *mp = sbcreatecontrol(&sdl, sdl.sdl_len, IP_RECVIF, IPPROTO_IP);
2060 1.44 thorpej if (*mp)
2061 1.44 thorpej mp = &(*mp)->m_next;
2062 1.44 thorpej }
2063 1.13 mycroft }
2064 1.13 mycroft
2065 1.189 atatat /*
2066 1.228 elad * sysctl helper routine for net.inet.ip.forwsrcrt.
2067 1.228 elad */
2068 1.228 elad static int
2069 1.228 elad sysctl_net_inet_ip_forwsrcrt(SYSCTLFN_ARGS)
2070 1.228 elad {
2071 1.228 elad int error, tmp;
2072 1.228 elad struct sysctlnode node;
2073 1.228 elad
2074 1.228 elad node = *rnode;
2075 1.228 elad tmp = ip_forwsrcrt;
2076 1.228 elad node.sysctl_data = &tmp;
2077 1.228 elad error = sysctl_lookup(SYSCTLFN_CALL(&node));
2078 1.228 elad if (error || newp == NULL)
2079 1.228 elad return (error);
2080 1.228 elad
2081 1.230 elad if (kauth_authorize_network(l->l_cred, KAUTH_NETWORK_FORWSRCRT,
2082 1.232 elad 0, NULL, NULL, NULL))
2083 1.228 elad return (EPERM);
2084 1.228 elad
2085 1.228 elad ip_forwsrcrt = tmp;
2086 1.228 elad
2087 1.228 elad return (0);
2088 1.228 elad }
2089 1.228 elad
2090 1.228 elad /*
2091 1.189 atatat * sysctl helper routine for net.inet.ip.mtudisctimeout. checks the
2092 1.189 atatat * range of the new value and tweaks timers if it changes.
2093 1.189 atatat */
2094 1.189 atatat static int
2095 1.189 atatat sysctl_net_inet_ip_pmtudto(SYSCTLFN_ARGS)
2096 1.13 mycroft {
2097 1.189 atatat int error, tmp;
2098 1.189 atatat struct sysctlnode node;
2099 1.189 atatat
2100 1.189 atatat node = *rnode;
2101 1.189 atatat tmp = ip_mtudisc_timeout;
2102 1.189 atatat node.sysctl_data = &tmp;
2103 1.189 atatat error = sysctl_lookup(SYSCTLFN_CALL(&node));
2104 1.189 atatat if (error || newp == NULL)
2105 1.189 atatat return (error);
2106 1.189 atatat if (tmp < 0)
2107 1.189 atatat return (EINVAL);
2108 1.52 thorpej
2109 1.189 atatat ip_mtudisc_timeout = tmp;
2110 1.189 atatat rt_timer_queue_change(ip_mtudisc_timeout_q, ip_mtudisc_timeout);
2111 1.189 atatat
2112 1.189 atatat return (0);
2113 1.189 atatat }
2114 1.54 lukem
2115 1.65 matt #ifdef GATEWAY
2116 1.189 atatat /*
2117 1.247 liamjfoy * sysctl helper routine for net.inet.ip.maxflows.
2118 1.189 atatat */
2119 1.189 atatat static int
2120 1.189 atatat sysctl_net_inet_ip_maxflows(SYSCTLFN_ARGS)
2121 1.189 atatat {
2122 1.189 atatat int s;
2123 1.67 thorpej
2124 1.217 atatat s = sysctl_lookup(SYSCTLFN_CALL(rnode));
2125 1.247 liamjfoy if (s || newp == NULL)
2126 1.189 atatat return (s);
2127 1.212 perry
2128 1.189 atatat s = splsoftnet();
2129 1.189 atatat ipflow_reap(0);
2130 1.189 atatat splx(s);
2131 1.144 martin
2132 1.189 atatat return (0);
2133 1.189 atatat }
2134 1.248 liamjfoy
2135 1.248 liamjfoy static int
2136 1.248 liamjfoy sysctl_net_inet_ip_hashsize(SYSCTLFN_ARGS)
2137 1.248 liamjfoy {
2138 1.248 liamjfoy int error, tmp;
2139 1.248 liamjfoy struct sysctlnode node;
2140 1.248 liamjfoy
2141 1.248 liamjfoy node = *rnode;
2142 1.248 liamjfoy tmp = ip_hashsize;
2143 1.248 liamjfoy node.sysctl_data = &tmp;
2144 1.248 liamjfoy error = sysctl_lookup(SYSCTLFN_CALL(&node));
2145 1.248 liamjfoy if (error || newp == NULL)
2146 1.248 liamjfoy return (error);
2147 1.248 liamjfoy
2148 1.248 liamjfoy if ((tmp & (tmp - 1)) == 0 && tmp != 0) {
2149 1.248 liamjfoy /*
2150 1.248 liamjfoy * Can only fail due to malloc()
2151 1.248 liamjfoy */
2152 1.248 liamjfoy if (ipflow_invalidate_all(tmp))
2153 1.248 liamjfoy return ENOMEM;
2154 1.248 liamjfoy } else {
2155 1.248 liamjfoy /*
2156 1.248 liamjfoy * EINVAL if not a power of 2
2157 1.248 liamjfoy */
2158 1.248 liamjfoy return EINVAL;
2159 1.248 liamjfoy }
2160 1.248 liamjfoy
2161 1.248 liamjfoy return (0);
2162 1.248 liamjfoy }
2163 1.189 atatat #endif /* GATEWAY */
2164 1.117 tron
2165 1.131 itojun
2166 1.189 atatat SYSCTL_SETUP(sysctl_net_inet_ip_setup, "sysctl net.inet.ip subtree setup")
2167 1.189 atatat {
2168 1.189 atatat extern int subnetsarelocal, hostzeroisbroadcast;
2169 1.180 jonathan
2170 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2171 1.197 atatat CTLFLAG_PERMANENT,
2172 1.189 atatat CTLTYPE_NODE, "net", NULL,
2173 1.189 atatat NULL, 0, NULL, 0,
2174 1.189 atatat CTL_NET, CTL_EOL);
2175 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2176 1.197 atatat CTLFLAG_PERMANENT,
2177 1.203 atatat CTLTYPE_NODE, "inet",
2178 1.203 atatat SYSCTL_DESCR("PF_INET related settings"),
2179 1.189 atatat NULL, 0, NULL, 0,
2180 1.189 atatat CTL_NET, PF_INET, CTL_EOL);
2181 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2182 1.197 atatat CTLFLAG_PERMANENT,
2183 1.203 atatat CTLTYPE_NODE, "ip",
2184 1.203 atatat SYSCTL_DESCR("IPv4 related settings"),
2185 1.189 atatat NULL, 0, NULL, 0,
2186 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL);
2187 1.212 perry
2188 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2189 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2190 1.203 atatat CTLTYPE_INT, "forwarding",
2191 1.203 atatat SYSCTL_DESCR("Enable forwarding of INET datagrams"),
2192 1.189 atatat NULL, 0, &ipforwarding, 0,
2193 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2194 1.189 atatat IPCTL_FORWARDING, CTL_EOL);
2195 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2196 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2197 1.203 atatat CTLTYPE_INT, "redirect",
2198 1.203 atatat SYSCTL_DESCR("Enable sending of ICMP redirect messages"),
2199 1.189 atatat NULL, 0, &ipsendredirects, 0,
2200 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2201 1.189 atatat IPCTL_SENDREDIRECTS, CTL_EOL);
2202 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2203 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2204 1.203 atatat CTLTYPE_INT, "ttl",
2205 1.203 atatat SYSCTL_DESCR("Default TTL for an INET datagram"),
2206 1.189 atatat NULL, 0, &ip_defttl, 0,
2207 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2208 1.189 atatat IPCTL_DEFTTL, CTL_EOL);
2209 1.189 atatat #ifdef IPCTL_DEFMTU
2210 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2211 1.197 atatat CTLFLAG_PERMANENT /* |CTLFLAG_READWRITE? */,
2212 1.203 atatat CTLTYPE_INT, "mtu",
2213 1.203 atatat SYSCTL_DESCR("Default MTA for an INET route"),
2214 1.189 atatat NULL, 0, &ip_mtu, 0,
2215 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2216 1.189 atatat IPCTL_DEFMTU, CTL_EOL);
2217 1.189 atatat #endif /* IPCTL_DEFMTU */
2218 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2219 1.228 elad CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2220 1.203 atatat CTLTYPE_INT, "forwsrcrt",
2221 1.203 atatat SYSCTL_DESCR("Enable forwarding of source-routed "
2222 1.203 atatat "datagrams"),
2223 1.228 elad sysctl_net_inet_ip_forwsrcrt, 0, &ip_forwsrcrt, 0,
2224 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2225 1.189 atatat IPCTL_FORWSRCRT, CTL_EOL);
2226 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2227 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2228 1.203 atatat CTLTYPE_INT, "directed-broadcast",
2229 1.203 atatat SYSCTL_DESCR("Enable forwarding of broadcast datagrams"),
2230 1.189 atatat NULL, 0, &ip_directedbcast, 0,
2231 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2232 1.189 atatat IPCTL_DIRECTEDBCAST, CTL_EOL);
2233 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2234 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2235 1.203 atatat CTLTYPE_INT, "allowsrcrt",
2236 1.203 atatat SYSCTL_DESCR("Accept source-routed datagrams"),
2237 1.189 atatat NULL, 0, &ip_allowsrcrt, 0,
2238 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2239 1.189 atatat IPCTL_ALLOWSRCRT, CTL_EOL);
2240 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2241 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2242 1.203 atatat CTLTYPE_INT, "subnetsarelocal",
2243 1.203 atatat SYSCTL_DESCR("Whether logical subnets are considered "
2244 1.203 atatat "local"),
2245 1.189 atatat NULL, 0, &subnetsarelocal, 0,
2246 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2247 1.189 atatat IPCTL_SUBNETSARELOCAL, CTL_EOL);
2248 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2249 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2250 1.203 atatat CTLTYPE_INT, "mtudisc",
2251 1.203 atatat SYSCTL_DESCR("Use RFC1191 Path MTU Discovery"),
2252 1.189 atatat NULL, 0, &ip_mtudisc, 0,
2253 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2254 1.189 atatat IPCTL_MTUDISC, CTL_EOL);
2255 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2256 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2257 1.203 atatat CTLTYPE_INT, "anonportmin",
2258 1.203 atatat SYSCTL_DESCR("Lowest ephemeral port number to assign"),
2259 1.189 atatat sysctl_net_inet_ip_ports, 0, &anonportmin, 0,
2260 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2261 1.189 atatat IPCTL_ANONPORTMIN, CTL_EOL);
2262 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2263 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2264 1.203 atatat CTLTYPE_INT, "anonportmax",
2265 1.203 atatat SYSCTL_DESCR("Highest ephemeral port number to assign"),
2266 1.189 atatat sysctl_net_inet_ip_ports, 0, &anonportmax, 0,
2267 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2268 1.189 atatat IPCTL_ANONPORTMAX, CTL_EOL);
2269 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2270 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2271 1.203 atatat CTLTYPE_INT, "mtudisctimeout",
2272 1.203 atatat SYSCTL_DESCR("Lifetime of a Path MTU Discovered route"),
2273 1.189 atatat sysctl_net_inet_ip_pmtudto, 0, &ip_mtudisc_timeout, 0,
2274 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2275 1.189 atatat IPCTL_MTUDISCTIMEOUT, CTL_EOL);
2276 1.189 atatat #ifdef GATEWAY
2277 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2278 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2279 1.203 atatat CTLTYPE_INT, "maxflows",
2280 1.203 atatat SYSCTL_DESCR("Number of flows for fast forwarding"),
2281 1.189 atatat sysctl_net_inet_ip_maxflows, 0, &ip_maxflows, 0,
2282 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2283 1.189 atatat IPCTL_MAXFLOWS, CTL_EOL);
2284 1.248 liamjfoy sysctl_createv(clog, 0, NULL, NULL,
2285 1.248 liamjfoy CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2286 1.248 liamjfoy CTLTYPE_INT, "hashsize",
2287 1.248 liamjfoy SYSCTL_DESCR("Size of hash table for fast forwarding (IPv4)"),
2288 1.248 liamjfoy sysctl_net_inet_ip_hashsize, 0, &ip_hashsize, 0,
2289 1.248 liamjfoy CTL_NET, PF_INET, IPPROTO_IP,
2290 1.248 liamjfoy CTL_CREATE, CTL_EOL);
2291 1.189 atatat #endif /* GATEWAY */
2292 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2293 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2294 1.203 atatat CTLTYPE_INT, "hostzerobroadcast",
2295 1.203 atatat SYSCTL_DESCR("All zeroes address is broadcast address"),
2296 1.189 atatat NULL, 0, &hostzeroisbroadcast, 0,
2297 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2298 1.189 atatat IPCTL_HOSTZEROBROADCAST, CTL_EOL);
2299 1.189 atatat #if NGIF > 0
2300 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2301 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2302 1.203 atatat CTLTYPE_INT, "gifttl",
2303 1.203 atatat SYSCTL_DESCR("Default TTL for a gif tunnel datagram"),
2304 1.189 atatat NULL, 0, &ip_gif_ttl, 0,
2305 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2306 1.189 atatat IPCTL_GIF_TTL, CTL_EOL);
2307 1.189 atatat #endif /* NGIF */
2308 1.189 atatat #ifndef IPNOPRIVPORTS
2309 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2310 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2311 1.203 atatat CTLTYPE_INT, "lowportmin",
2312 1.203 atatat SYSCTL_DESCR("Lowest privileged ephemeral port number "
2313 1.203 atatat "to assign"),
2314 1.189 atatat sysctl_net_inet_ip_ports, 0, &lowportmin, 0,
2315 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2316 1.189 atatat IPCTL_LOWPORTMIN, CTL_EOL);
2317 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2318 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2319 1.203 atatat CTLTYPE_INT, "lowportmax",
2320 1.203 atatat SYSCTL_DESCR("Highest privileged ephemeral port number "
2321 1.203 atatat "to assign"),
2322 1.189 atatat sysctl_net_inet_ip_ports, 0, &lowportmax, 0,
2323 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2324 1.189 atatat IPCTL_LOWPORTMAX, CTL_EOL);
2325 1.189 atatat #endif /* IPNOPRIVPORTS */
2326 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2327 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2328 1.203 atatat CTLTYPE_INT, "maxfragpackets",
2329 1.203 atatat SYSCTL_DESCR("Maximum number of fragments to retain for "
2330 1.203 atatat "possible reassembly"),
2331 1.189 atatat NULL, 0, &ip_maxfragpackets, 0,
2332 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2333 1.189 atatat IPCTL_MAXFRAGPACKETS, CTL_EOL);
2334 1.189 atatat #if NGRE > 0
2335 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2336 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2337 1.203 atatat CTLTYPE_INT, "grettl",
2338 1.203 atatat SYSCTL_DESCR("Default TTL for a gre tunnel datagram"),
2339 1.189 atatat NULL, 0, &ip_gre_ttl, 0,
2340 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2341 1.189 atatat IPCTL_GRE_TTL, CTL_EOL);
2342 1.189 atatat #endif /* NGRE */
2343 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2344 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2345 1.203 atatat CTLTYPE_INT, "checkinterface",
2346 1.203 atatat SYSCTL_DESCR("Enable receive side of Strong ES model "
2347 1.203 atatat "from RFC1122"),
2348 1.189 atatat NULL, 0, &ip_checkinterface, 0,
2349 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2350 1.189 atatat IPCTL_CHECKINTERFACE, CTL_EOL);
2351 1.197 atatat sysctl_createv(clog, 0, NULL, NULL,
2352 1.197 atatat CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2353 1.203 atatat CTLTYPE_INT, "random_id",
2354 1.203 atatat SYSCTL_DESCR("Assign random ip_id values"),
2355 1.189 atatat NULL, 0, &ip_do_randomid, 0,
2356 1.189 atatat CTL_NET, PF_INET, IPPROTO_IP,
2357 1.189 atatat IPCTL_RANDOMID, CTL_EOL);
2358 1.206 thorpej sysctl_createv(clog, 0, NULL, NULL,
2359 1.206 thorpej CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
2360 1.206 thorpej CTLTYPE_INT, "do_loopback_cksum",
2361 1.206 thorpej SYSCTL_DESCR("Perform IP checksum on loopback"),
2362 1.206 thorpej NULL, 0, &ip_do_loopback_cksum, 0,
2363 1.206 thorpej CTL_NET, PF_INET, IPPROTO_IP,
2364 1.206 thorpej IPCTL_LOOPBACKCKSUM, CTL_EOL);
2365 1.219 elad sysctl_createv(clog, 0, NULL, NULL,
2366 1.219 elad CTLFLAG_PERMANENT,
2367 1.219 elad CTLTYPE_STRUCT, "stats",
2368 1.219 elad SYSCTL_DESCR("IP statistics"),
2369 1.219 elad NULL, 0, &ipstat, sizeof(ipstat),
2370 1.219 elad CTL_NET, PF_INET, IPPROTO_IP, IPCTL_STATS,
2371 1.219 elad CTL_EOL);
2372 1.1 cgd }
2373