ip_input.c revision 1.183 1 1.183 jonathan /* $NetBSD: ip_input.c,v 1.183 2003/11/17 22:34:16 jonathan Exp $ */
2 1.89 itojun
3 1.89 itojun /*
4 1.89 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 1.89 itojun * All rights reserved.
6 1.152 itojun *
7 1.89 itojun * Redistribution and use in source and binary forms, with or without
8 1.89 itojun * modification, are permitted provided that the following conditions
9 1.89 itojun * are met:
10 1.89 itojun * 1. Redistributions of source code must retain the above copyright
11 1.89 itojun * notice, this list of conditions and the following disclaimer.
12 1.89 itojun * 2. Redistributions in binary form must reproduce the above copyright
13 1.89 itojun * notice, this list of conditions and the following disclaimer in the
14 1.89 itojun * documentation and/or other materials provided with the distribution.
15 1.89 itojun * 3. Neither the name of the project nor the names of its contributors
16 1.89 itojun * may be used to endorse or promote products derived from this software
17 1.89 itojun * without specific prior written permission.
18 1.152 itojun *
19 1.89 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 1.89 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.89 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.89 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 1.89 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.89 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.89 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.89 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.89 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.89 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.89 itojun * SUCH DAMAGE.
30 1.89 itojun */
31 1.76 thorpej
32 1.76 thorpej /*-
33 1.76 thorpej * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 1.76 thorpej * All rights reserved.
35 1.76 thorpej *
36 1.76 thorpej * This code is derived from software contributed to The NetBSD Foundation
37 1.76 thorpej * by Public Access Networks Corporation ("Panix"). It was developed under
38 1.76 thorpej * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 1.76 thorpej *
40 1.76 thorpej * Redistribution and use in source and binary forms, with or without
41 1.76 thorpej * modification, are permitted provided that the following conditions
42 1.76 thorpej * are met:
43 1.76 thorpej * 1. Redistributions of source code must retain the above copyright
44 1.76 thorpej * notice, this list of conditions and the following disclaimer.
45 1.76 thorpej * 2. Redistributions in binary form must reproduce the above copyright
46 1.76 thorpej * notice, this list of conditions and the following disclaimer in the
47 1.76 thorpej * documentation and/or other materials provided with the distribution.
48 1.76 thorpej * 3. All advertising materials mentioning features or use of this software
49 1.76 thorpej * must display the following acknowledgement:
50 1.76 thorpej * This product includes software developed by the NetBSD
51 1.76 thorpej * Foundation, Inc. and its contributors.
52 1.76 thorpej * 4. Neither the name of The NetBSD Foundation nor the names of its
53 1.76 thorpej * contributors may be used to endorse or promote products derived
54 1.76 thorpej * from this software without specific prior written permission.
55 1.76 thorpej *
56 1.76 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 1.76 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 1.76 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 1.76 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 1.76 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 1.76 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 1.76 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 1.76 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 1.76 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 1.76 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 1.76 thorpej * POSSIBILITY OF SUCH DAMAGE.
67 1.76 thorpej */
68 1.14 cgd
69 1.1 cgd /*
70 1.13 mycroft * Copyright (c) 1982, 1986, 1988, 1993
71 1.13 mycroft * The Regents of the University of California. All rights reserved.
72 1.1 cgd *
73 1.1 cgd * Redistribution and use in source and binary forms, with or without
74 1.1 cgd * modification, are permitted provided that the following conditions
75 1.1 cgd * are met:
76 1.1 cgd * 1. Redistributions of source code must retain the above copyright
77 1.1 cgd * notice, this list of conditions and the following disclaimer.
78 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
79 1.1 cgd * notice, this list of conditions and the following disclaimer in the
80 1.1 cgd * documentation and/or other materials provided with the distribution.
81 1.172 agc * 3. Neither the name of the University nor the names of its contributors
82 1.1 cgd * may be used to endorse or promote products derived from this software
83 1.1 cgd * without specific prior written permission.
84 1.1 cgd *
85 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
86 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
87 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
88 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
89 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
90 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
91 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
92 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
93 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
94 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
95 1.1 cgd * SUCH DAMAGE.
96 1.1 cgd *
97 1.14 cgd * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
98 1.1 cgd */
99 1.141 lukem
100 1.141 lukem #include <sys/cdefs.h>
101 1.183 jonathan __KERNEL_RCSID(0, "$NetBSD: ip_input.c,v 1.183 2003/11/17 22:34:16 jonathan Exp $");
102 1.55 scottr
103 1.62 matt #include "opt_gateway.h"
104 1.69 mrg #include "opt_pfil_hooks.h"
105 1.91 thorpej #include "opt_ipsec.h"
106 1.55 scottr #include "opt_mrouting.h"
107 1.167 martin #include "opt_mbuftrace.h"
108 1.135 thorpej #include "opt_inet_csum.h"
109 1.1 cgd
110 1.5 mycroft #include <sys/param.h>
111 1.5 mycroft #include <sys/systm.h>
112 1.5 mycroft #include <sys/malloc.h>
113 1.5 mycroft #include <sys/mbuf.h>
114 1.5 mycroft #include <sys/domain.h>
115 1.5 mycroft #include <sys/protosw.h>
116 1.5 mycroft #include <sys/socket.h>
117 1.44 thorpej #include <sys/socketvar.h>
118 1.5 mycroft #include <sys/errno.h>
119 1.5 mycroft #include <sys/time.h>
120 1.5 mycroft #include <sys/kernel.h>
121 1.72 thorpej #include <sys/pool.h>
122 1.28 christos #include <sys/sysctl.h>
123 1.1 cgd
124 1.5 mycroft #include <net/if.h>
125 1.44 thorpej #include <net/if_dl.h>
126 1.5 mycroft #include <net/route.h>
127 1.45 mrg #include <net/pfil.h>
128 1.1 cgd
129 1.5 mycroft #include <netinet/in.h>
130 1.5 mycroft #include <netinet/in_systm.h>
131 1.5 mycroft #include <netinet/ip.h>
132 1.5 mycroft #include <netinet/in_pcb.h>
133 1.5 mycroft #include <netinet/in_var.h>
134 1.5 mycroft #include <netinet/ip_var.h>
135 1.5 mycroft #include <netinet/ip_icmp.h>
136 1.89 itojun /* just for gif_ttl */
137 1.89 itojun #include <netinet/in_gif.h>
138 1.89 itojun #include "gif.h"
139 1.144 martin #include <net/if_gre.h>
140 1.144 martin #include "gre.h"
141 1.111 jdolecek
142 1.111 jdolecek #ifdef MROUTING
143 1.111 jdolecek #include <netinet/ip_mroute.h>
144 1.111 jdolecek #endif
145 1.89 itojun
146 1.89 itojun #ifdef IPSEC
147 1.89 itojun #include <netinet6/ipsec.h>
148 1.89 itojun #include <netkey/key.h>
149 1.89 itojun #endif
150 1.173 jonathan #ifdef FAST_IPSEC
151 1.173 jonathan #include <netipsec/ipsec.h>
152 1.173 jonathan #include <netipsec/key.h>
153 1.173 jonathan #endif /* FAST_IPSEC*/
154 1.44 thorpej
155 1.1 cgd #ifndef IPFORWARDING
156 1.1 cgd #ifdef GATEWAY
157 1.1 cgd #define IPFORWARDING 1 /* forward IP packets not for us */
158 1.1 cgd #else /* GATEWAY */
159 1.1 cgd #define IPFORWARDING 0 /* don't forward IP packets not for us */
160 1.1 cgd #endif /* GATEWAY */
161 1.1 cgd #endif /* IPFORWARDING */
162 1.1 cgd #ifndef IPSENDREDIRECTS
163 1.1 cgd #define IPSENDREDIRECTS 1
164 1.1 cgd #endif
165 1.26 thorpej #ifndef IPFORWSRCRT
166 1.47 cjs #define IPFORWSRCRT 1 /* forward source-routed packets */
167 1.47 cjs #endif
168 1.47 cjs #ifndef IPALLOWSRCRT
169 1.48 mrg #define IPALLOWSRCRT 1 /* allow source-routed packets */
170 1.26 thorpej #endif
171 1.53 kml #ifndef IPMTUDISC
172 1.153 itojun #define IPMTUDISC 1
173 1.53 kml #endif
174 1.60 kml #ifndef IPMTUDISCTIMEOUT
175 1.61 kml #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */
176 1.60 kml #endif
177 1.53 kml
178 1.27 thorpej /*
179 1.27 thorpej * Note: DIRECTED_BROADCAST is handled this way so that previous
180 1.27 thorpej * configuration using this option will Just Work.
181 1.27 thorpej */
182 1.27 thorpej #ifndef IPDIRECTEDBCAST
183 1.27 thorpej #ifdef DIRECTED_BROADCAST
184 1.27 thorpej #define IPDIRECTEDBCAST 1
185 1.27 thorpej #else
186 1.27 thorpej #define IPDIRECTEDBCAST 0
187 1.27 thorpej #endif /* DIRECTED_BROADCAST */
188 1.27 thorpej #endif /* IPDIRECTEDBCAST */
189 1.1 cgd int ipforwarding = IPFORWARDING;
190 1.1 cgd int ipsendredirects = IPSENDREDIRECTS;
191 1.13 mycroft int ip_defttl = IPDEFTTL;
192 1.26 thorpej int ip_forwsrcrt = IPFORWSRCRT;
193 1.27 thorpej int ip_directedbcast = IPDIRECTEDBCAST;
194 1.47 cjs int ip_allowsrcrt = IPALLOWSRCRT;
195 1.53 kml int ip_mtudisc = IPMTUDISC;
196 1.156 itojun int ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
197 1.1 cgd #ifdef DIAGNOSTIC
198 1.1 cgd int ipprintfs = 0;
199 1.1 cgd #endif
200 1.165 christos /*
201 1.165 christos * XXX - Setting ip_checkinterface mostly implements the receive side of
202 1.165 christos * the Strong ES model described in RFC 1122, but since the routing table
203 1.165 christos * and transmit implementation do not implement the Strong ES model,
204 1.165 christos * setting this to 1 results in an odd hybrid.
205 1.165 christos *
206 1.165 christos * XXX - ip_checkinterface currently must be disabled if you use ipnat
207 1.165 christos * to translate the destination address to another local interface.
208 1.165 christos *
209 1.165 christos * XXX - ip_checkinterface must be disabled if you add IP aliases
210 1.165 christos * to the loopback interface instead of the interface where the
211 1.165 christos * packets for those addresses are received.
212 1.165 christos */
213 1.165 christos int ip_checkinterface = 0;
214 1.165 christos
215 1.1 cgd
216 1.60 kml struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
217 1.60 kml
218 1.1 cgd extern struct domain inetdomain;
219 1.1 cgd int ipqmaxlen = IFQ_MAXLEN;
220 1.150 matt u_long in_ifaddrhash; /* size of hash table - 1 */
221 1.150 matt int in_ifaddrentries; /* total number of addrs */
222 1.181 jonathan struct in_ifaddrhead in_ifaddrhead;
223 1.57 tls struct in_ifaddrhashhead *in_ifaddrhashtbl;
224 1.166 matt u_long in_multihash; /* size of hash table - 1 */
225 1.166 matt int in_multientries; /* total number of addrs */
226 1.166 matt struct in_multihashhead *in_multihashtbl;
227 1.13 mycroft struct ifqueue ipintrq;
228 1.63 matt struct ipstat ipstat;
229 1.183 jonathan uint16_t ip_id;
230 1.75 thorpej
231 1.121 thorpej #ifdef PFIL_HOOKS
232 1.121 thorpej struct pfil_head inet_pfil_hook;
233 1.121 thorpej #endif
234 1.121 thorpej
235 1.63 matt struct ipqhead ipq;
236 1.75 thorpej int ipq_locked;
237 1.131 itojun int ip_nfragpackets = 0;
238 1.133 itojun int ip_maxfragpackets = 200;
239 1.75 thorpej
240 1.75 thorpej static __inline int ipq_lock_try __P((void));
241 1.75 thorpej static __inline void ipq_unlock __P((void));
242 1.75 thorpej
243 1.75 thorpej static __inline int
244 1.75 thorpej ipq_lock_try()
245 1.75 thorpej {
246 1.75 thorpej int s;
247 1.75 thorpej
248 1.132 thorpej /*
249 1.149 wiz * Use splvm() -- we're blocking things that would cause
250 1.132 thorpej * mbuf allocation.
251 1.132 thorpej */
252 1.132 thorpej s = splvm();
253 1.75 thorpej if (ipq_locked) {
254 1.75 thorpej splx(s);
255 1.75 thorpej return (0);
256 1.75 thorpej }
257 1.75 thorpej ipq_locked = 1;
258 1.75 thorpej splx(s);
259 1.75 thorpej return (1);
260 1.75 thorpej }
261 1.75 thorpej
262 1.75 thorpej static __inline void
263 1.75 thorpej ipq_unlock()
264 1.75 thorpej {
265 1.75 thorpej int s;
266 1.75 thorpej
267 1.132 thorpej s = splvm();
268 1.75 thorpej ipq_locked = 0;
269 1.75 thorpej splx(s);
270 1.75 thorpej }
271 1.75 thorpej
272 1.75 thorpej #ifdef DIAGNOSTIC
273 1.75 thorpej #define IPQ_LOCK() \
274 1.75 thorpej do { \
275 1.75 thorpej if (ipq_lock_try() == 0) { \
276 1.75 thorpej printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
277 1.75 thorpej panic("ipq_lock"); \
278 1.75 thorpej } \
279 1.159 perry } while (/*CONSTCOND*/ 0)
280 1.75 thorpej #define IPQ_LOCK_CHECK() \
281 1.75 thorpej do { \
282 1.75 thorpej if (ipq_locked == 0) { \
283 1.75 thorpej printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
284 1.75 thorpej panic("ipq lock check"); \
285 1.75 thorpej } \
286 1.159 perry } while (/*CONSTCOND*/ 0)
287 1.75 thorpej #else
288 1.75 thorpej #define IPQ_LOCK() (void) ipq_lock_try()
289 1.75 thorpej #define IPQ_LOCK_CHECK() /* nothing */
290 1.75 thorpej #endif
291 1.75 thorpej
292 1.75 thorpej #define IPQ_UNLOCK() ipq_unlock()
293 1.1 cgd
294 1.166 matt struct pool inmulti_pool;
295 1.72 thorpej struct pool ipqent_pool;
296 1.72 thorpej
297 1.135 thorpej #ifdef INET_CSUM_COUNTERS
298 1.135 thorpej #include <sys/device.h>
299 1.135 thorpej
300 1.135 thorpej struct evcnt ip_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
301 1.135 thorpej NULL, "inet", "hwcsum bad");
302 1.135 thorpej struct evcnt ip_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
303 1.135 thorpej NULL, "inet", "hwcsum ok");
304 1.135 thorpej struct evcnt ip_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
305 1.135 thorpej NULL, "inet", "swcsum");
306 1.135 thorpej
307 1.135 thorpej #define INET_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
308 1.135 thorpej
309 1.135 thorpej #else
310 1.135 thorpej
311 1.135 thorpej #define INET_CSUM_COUNTER_INCR(ev) /* nothing */
312 1.135 thorpej
313 1.135 thorpej #endif /* INET_CSUM_COUNTERS */
314 1.135 thorpej
315 1.1 cgd /*
316 1.1 cgd * We need to save the IP options in case a protocol wants to respond
317 1.1 cgd * to an incoming packet over the same route if the packet got here
318 1.1 cgd * using IP source routing. This allows connection establishment and
319 1.1 cgd * maintenance when the remote end is on a network that is not known
320 1.1 cgd * to us.
321 1.1 cgd */
322 1.1 cgd int ip_nhops = 0;
323 1.1 cgd static struct ip_srcrt {
324 1.1 cgd struct in_addr dst; /* final destination */
325 1.1 cgd char nop; /* one NOP to align */
326 1.1 cgd char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
327 1.1 cgd struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
328 1.1 cgd } ip_srcrt;
329 1.1 cgd
330 1.13 mycroft static void save_rte __P((u_char *, struct in_addr));
331 1.35 mycroft
332 1.164 matt #ifdef MBUFTRACE
333 1.164 matt struct mowner ip_rx_mowner = { "internet", "rx" };
334 1.164 matt struct mowner ip_tx_mowner = { "internet", "tx" };
335 1.164 matt #endif
336 1.164 matt
337 1.1 cgd /*
338 1.1 cgd * IP initialization: fill in IP protocol switch table.
339 1.1 cgd * All protocols not implemented in kernel go to raw IP protocol handler.
340 1.1 cgd */
341 1.8 mycroft void
342 1.1 cgd ip_init()
343 1.1 cgd {
344 1.109 augustss struct protosw *pr;
345 1.109 augustss int i;
346 1.1 cgd
347 1.166 matt pool_init(&inmulti_pool, sizeof(struct in_multi), 0, 0, 0, "inmltpl",
348 1.166 matt NULL);
349 1.72 thorpej pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl",
350 1.146 thorpej NULL);
351 1.72 thorpej
352 1.1 cgd pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
353 1.1 cgd if (pr == 0)
354 1.1 cgd panic("ip_init");
355 1.1 cgd for (i = 0; i < IPPROTO_MAX; i++)
356 1.1 cgd ip_protox[i] = pr - inetsw;
357 1.1 cgd for (pr = inetdomain.dom_protosw;
358 1.1 cgd pr < inetdomain.dom_protoswNPROTOSW; pr++)
359 1.1 cgd if (pr->pr_domain->dom_family == PF_INET &&
360 1.1 cgd pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
361 1.1 cgd ip_protox[pr->pr_protocol] = pr - inetsw;
362 1.25 cgd LIST_INIT(&ipq);
363 1.183 jonathan ip_id = time.tv_sec & 0xfffff;
364 1.1 cgd ipintrq.ifq_maxlen = ipqmaxlen;
365 1.181 jonathan TAILQ_INIT(&in_ifaddrhead);
366 1.120 ad in_ifaddrhashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IFADDR,
367 1.120 ad M_WAITOK, &in_ifaddrhash);
368 1.166 matt in_multihashtbl = hashinit(IN_IFADDR_HASH_SIZE, HASH_LIST, M_IPMADDR,
369 1.166 matt M_WAITOK, &in_multihash);
370 1.160 itojun ip_mtudisc_timeout_q = rt_timer_queue_create(ip_mtudisc_timeout);
371 1.73 thorpej #ifdef GATEWAY
372 1.73 thorpej ipflow_init();
373 1.73 thorpej #endif
374 1.121 thorpej
375 1.121 thorpej #ifdef PFIL_HOOKS
376 1.121 thorpej /* Register our Packet Filter hook. */
377 1.126 thorpej inet_pfil_hook.ph_type = PFIL_TYPE_AF;
378 1.126 thorpej inet_pfil_hook.ph_af = AF_INET;
379 1.121 thorpej i = pfil_head_register(&inet_pfil_hook);
380 1.121 thorpej if (i != 0)
381 1.121 thorpej printf("ip_init: WARNING: unable to register pfil hook, "
382 1.121 thorpej "error %d\n", i);
383 1.121 thorpej #endif /* PFIL_HOOKS */
384 1.135 thorpej
385 1.135 thorpej #ifdef INET_CSUM_COUNTERS
386 1.135 thorpej evcnt_attach_static(&ip_hwcsum_bad);
387 1.135 thorpej evcnt_attach_static(&ip_hwcsum_ok);
388 1.135 thorpej evcnt_attach_static(&ip_swcsum);
389 1.135 thorpej #endif /* INET_CSUM_COUNTERS */
390 1.164 matt
391 1.164 matt #ifdef MBUFTRACE
392 1.164 matt MOWNER_ATTACH(&ip_tx_mowner);
393 1.164 matt MOWNER_ATTACH(&ip_rx_mowner);
394 1.164 matt #endif /* MBUFTRACE */
395 1.1 cgd }
396 1.1 cgd
397 1.1 cgd struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
398 1.1 cgd struct route ipforward_rt;
399 1.1 cgd
400 1.1 cgd /*
401 1.89 itojun * IP software interrupt routine
402 1.89 itojun */
403 1.89 itojun void
404 1.89 itojun ipintr()
405 1.89 itojun {
406 1.89 itojun int s;
407 1.89 itojun struct mbuf *m;
408 1.89 itojun
409 1.89 itojun while (1) {
410 1.132 thorpej s = splnet();
411 1.89 itojun IF_DEQUEUE(&ipintrq, m);
412 1.89 itojun splx(s);
413 1.89 itojun if (m == 0)
414 1.89 itojun return;
415 1.164 matt MCLAIM(m, &ip_rx_mowner);
416 1.89 itojun ip_input(m);
417 1.89 itojun }
418 1.89 itojun }
419 1.89 itojun
420 1.89 itojun /*
421 1.1 cgd * Ip input routine. Checksum and byte swap header. If fragmented
422 1.1 cgd * try to reassemble. Process options. Pass to next level.
423 1.1 cgd */
424 1.8 mycroft void
425 1.89 itojun ip_input(struct mbuf *m)
426 1.1 cgd {
427 1.109 augustss struct ip *ip = NULL;
428 1.109 augustss struct ipq *fp;
429 1.109 augustss struct in_ifaddr *ia;
430 1.109 augustss struct ifaddr *ifa;
431 1.25 cgd struct ipqent *ipqe;
432 1.89 itojun int hlen = 0, mff, len;
433 1.100 itojun int downmatch;
434 1.165 christos int checkif;
435 1.169 itojun int srcrt = 0;
436 1.173 jonathan #ifdef FAST_IPSEC
437 1.173 jonathan struct m_tag *mtag;
438 1.173 jonathan struct tdb_ident *tdbi;
439 1.173 jonathan struct secpolicy *sp;
440 1.173 jonathan int s, error;
441 1.173 jonathan #endif /* FAST_IPSEC */
442 1.1 cgd
443 1.164 matt MCLAIM(m, &ip_rx_mowner);
444 1.1 cgd #ifdef DIAGNOSTIC
445 1.1 cgd if ((m->m_flags & M_PKTHDR) == 0)
446 1.1 cgd panic("ipintr no HDR");
447 1.89 itojun #endif
448 1.164 matt
449 1.1 cgd /*
450 1.1 cgd * If no IP addresses have been set yet but the interfaces
451 1.1 cgd * are receiving, can't do anything with incoming packets yet.
452 1.1 cgd */
453 1.181 jonathan if (TAILQ_FIRST(&in_ifaddrhead) == 0)
454 1.1 cgd goto bad;
455 1.1 cgd ipstat.ips_total++;
456 1.154 thorpej /*
457 1.154 thorpej * If the IP header is not aligned, slurp it up into a new
458 1.154 thorpej * mbuf with space for link headers, in the event we forward
459 1.154 thorpej * it. Otherwise, if it is aligned, make sure the entire
460 1.154 thorpej * base IP header is in the first mbuf of the chain.
461 1.154 thorpej */
462 1.154 thorpej if (IP_HDR_ALIGNED_P(mtod(m, caddr_t)) == 0) {
463 1.154 thorpej if ((m = m_copyup(m, sizeof(struct ip),
464 1.154 thorpej (max_linkhdr + 3) & ~3)) == NULL) {
465 1.154 thorpej /* XXXJRT new stat, please */
466 1.154 thorpej ipstat.ips_toosmall++;
467 1.154 thorpej return;
468 1.154 thorpej }
469 1.154 thorpej } else if (__predict_false(m->m_len < sizeof (struct ip))) {
470 1.154 thorpej if ((m = m_pullup(m, sizeof (struct ip))) == NULL) {
471 1.154 thorpej ipstat.ips_toosmall++;
472 1.154 thorpej return;
473 1.154 thorpej }
474 1.1 cgd }
475 1.1 cgd ip = mtod(m, struct ip *);
476 1.13 mycroft if (ip->ip_v != IPVERSION) {
477 1.13 mycroft ipstat.ips_badvers++;
478 1.13 mycroft goto bad;
479 1.13 mycroft }
480 1.1 cgd hlen = ip->ip_hl << 2;
481 1.1 cgd if (hlen < sizeof(struct ip)) { /* minimum header length */
482 1.1 cgd ipstat.ips_badhlen++;
483 1.1 cgd goto bad;
484 1.1 cgd }
485 1.1 cgd if (hlen > m->m_len) {
486 1.1 cgd if ((m = m_pullup(m, hlen)) == 0) {
487 1.1 cgd ipstat.ips_badhlen++;
488 1.89 itojun return;
489 1.1 cgd }
490 1.1 cgd ip = mtod(m, struct ip *);
491 1.1 cgd }
492 1.98 thorpej
493 1.85 hwr /*
494 1.99 thorpej * RFC1122: packets with a multicast source address are
495 1.98 thorpej * not allowed.
496 1.85 hwr */
497 1.85 hwr if (IN_MULTICAST(ip->ip_src.s_addr)) {
498 1.130 itojun ipstat.ips_badaddr++;
499 1.85 hwr goto bad;
500 1.129 itojun }
501 1.129 itojun
502 1.129 itojun /* 127/8 must not appear on wire - RFC1122 */
503 1.129 itojun if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
504 1.129 itojun (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
505 1.130 itojun if ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0) {
506 1.130 itojun ipstat.ips_badaddr++;
507 1.129 itojun goto bad;
508 1.130 itojun }
509 1.85 hwr }
510 1.85 hwr
511 1.135 thorpej switch (m->m_pkthdr.csum_flags &
512 1.137 thorpej ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_IPv4) |
513 1.135 thorpej M_CSUM_IPv4_BAD)) {
514 1.135 thorpej case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
515 1.135 thorpej INET_CSUM_COUNTER_INCR(&ip_hwcsum_bad);
516 1.135 thorpej goto badcsum;
517 1.135 thorpej
518 1.135 thorpej case M_CSUM_IPv4:
519 1.135 thorpej /* Checksum was okay. */
520 1.135 thorpej INET_CSUM_COUNTER_INCR(&ip_hwcsum_ok);
521 1.135 thorpej break;
522 1.135 thorpej
523 1.135 thorpej default:
524 1.135 thorpej /* Must compute it ourselves. */
525 1.135 thorpej INET_CSUM_COUNTER_INCR(&ip_swcsum);
526 1.135 thorpej if (in_cksum(m, hlen) != 0)
527 1.135 thorpej goto bad;
528 1.135 thorpej break;
529 1.1 cgd }
530 1.1 cgd
531 1.121 thorpej /* Retrieve the packet length. */
532 1.121 thorpej len = ntohs(ip->ip_len);
533 1.81 proff
534 1.81 proff /*
535 1.81 proff * Check for additional length bogosity
536 1.81 proff */
537 1.84 proff if (len < hlen) {
538 1.81 proff ipstat.ips_badlen++;
539 1.81 proff goto bad;
540 1.81 proff }
541 1.1 cgd
542 1.1 cgd /*
543 1.1 cgd * Check that the amount of data in the buffers
544 1.1 cgd * is as at least much as the IP header would have us expect.
545 1.1 cgd * Trim mbufs if longer than we expect.
546 1.1 cgd * Drop packet if shorter than we expect.
547 1.1 cgd */
548 1.35 mycroft if (m->m_pkthdr.len < len) {
549 1.1 cgd ipstat.ips_tooshort++;
550 1.1 cgd goto bad;
551 1.1 cgd }
552 1.35 mycroft if (m->m_pkthdr.len > len) {
553 1.1 cgd if (m->m_len == m->m_pkthdr.len) {
554 1.35 mycroft m->m_len = len;
555 1.35 mycroft m->m_pkthdr.len = len;
556 1.1 cgd } else
557 1.35 mycroft m_adj(m, len - m->m_pkthdr.len);
558 1.1 cgd }
559 1.1 cgd
560 1.94 itojun #ifdef IPSEC
561 1.149 wiz /* ipflow (IP fast forwarding) is not compatible with IPsec. */
562 1.94 itojun m->m_flags &= ~M_CANFASTFWD;
563 1.94 itojun #else
564 1.64 thorpej /*
565 1.64 thorpej * Assume that we can create a fast-forward IP flow entry
566 1.64 thorpej * based on this packet.
567 1.64 thorpej */
568 1.64 thorpej m->m_flags |= M_CANFASTFWD;
569 1.94 itojun #endif
570 1.64 thorpej
571 1.36 mrg #ifdef PFIL_HOOKS
572 1.33 mrg /*
573 1.64 thorpej * Run through list of hooks for input packets. If there are any
574 1.64 thorpej * filters which require that additional packets in the flow are
575 1.64 thorpej * not fast-forwarded, they must clear the M_CANFASTFWD flag.
576 1.64 thorpej * Note that filters must _never_ set this flag, as another filter
577 1.64 thorpej * in the list may have previously cleared it.
578 1.33 mrg */
579 1.127 itojun /*
580 1.127 itojun * let ipfilter look at packet on the wire,
581 1.127 itojun * not the decapsulated packet.
582 1.127 itojun */
583 1.127 itojun #ifdef IPSEC
584 1.136 itojun if (!ipsec_getnhist(m))
585 1.127 itojun #else
586 1.127 itojun if (1)
587 1.127 itojun #endif
588 1.127 itojun {
589 1.169 itojun struct in_addr odst;
590 1.169 itojun
591 1.169 itojun odst = ip->ip_dst;
592 1.127 itojun if (pfil_run_hooks(&inet_pfil_hook, &m, m->m_pkthdr.rcvif,
593 1.168 itojun PFIL_IN) != 0)
594 1.168 itojun return;
595 1.127 itojun if (m == NULL)
596 1.127 itojun return;
597 1.127 itojun ip = mtod(m, struct ip *);
598 1.142 darrenr hlen = ip->ip_hl << 2;
599 1.169 itojun srcrt = (odst.s_addr != ip->ip_dst.s_addr);
600 1.127 itojun }
601 1.36 mrg #endif /* PFIL_HOOKS */
602 1.123 thorpej
603 1.123 thorpej #ifdef ALTQ
604 1.123 thorpej /* XXX Temporary until ALTQ is changed to use a pfil hook */
605 1.123 thorpej if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) {
606 1.123 thorpej /* packet dropped by traffic conditioner */
607 1.123 thorpej return;
608 1.123 thorpej }
609 1.123 thorpej #endif
610 1.121 thorpej
611 1.121 thorpej /*
612 1.1 cgd * Process options and, if not destined for us,
613 1.1 cgd * ship it on. ip_dooptions returns 1 when an
614 1.1 cgd * error was detected (causing an icmp message
615 1.1 cgd * to be sent and the original packet to be freed).
616 1.1 cgd */
617 1.1 cgd ip_nhops = 0; /* for source routed packets */
618 1.1 cgd if (hlen > sizeof (struct ip) && ip_dooptions(m))
619 1.89 itojun return;
620 1.1 cgd
621 1.1 cgd /*
622 1.165 christos * Enable a consistency check between the destination address
623 1.165 christos * and the arrival interface for a unicast packet (the RFC 1122
624 1.165 christos * strong ES model) if IP forwarding is disabled and the packet
625 1.165 christos * is not locally generated.
626 1.165 christos *
627 1.165 christos * XXX - Checking also should be disabled if the destination
628 1.165 christos * address is ipnat'ed to a different interface.
629 1.165 christos *
630 1.165 christos * XXX - Checking is incompatible with IP aliases added
631 1.165 christos * to the loopback interface instead of the interface where
632 1.165 christos * the packets are received.
633 1.165 christos *
634 1.165 christos * XXX - We need to add a per ifaddr flag for this so that
635 1.165 christos * we get finer grain control.
636 1.165 christos */
637 1.165 christos checkif = ip_checkinterface && (ipforwarding == 0) &&
638 1.165 christos (m->m_pkthdr.rcvif != NULL) &&
639 1.165 christos ((m->m_pkthdr.rcvif->if_flags & IFF_LOOPBACK) == 0);
640 1.165 christos
641 1.165 christos /*
642 1.1 cgd * Check our list of addresses, to see if the packet is for us.
643 1.100 itojun *
644 1.100 itojun * Traditional 4.4BSD did not consult IFF_UP at all.
645 1.100 itojun * The behavior here is to treat addresses on !IFF_UP interface
646 1.100 itojun * as not mine.
647 1.1 cgd */
648 1.100 itojun downmatch = 0;
649 1.140 matt LIST_FOREACH(ia, &IN_IFADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
650 1.97 itojun if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
651 1.165 christos if (checkif && ia->ia_ifp != m->m_pkthdr.rcvif)
652 1.165 christos continue;
653 1.97 itojun if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
654 1.97 itojun break;
655 1.100 itojun else
656 1.100 itojun downmatch++;
657 1.97 itojun }
658 1.97 itojun }
659 1.86 thorpej if (ia != NULL)
660 1.86 thorpej goto ours;
661 1.57 tls if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
662 1.140 matt TAILQ_FOREACH(ifa, &m->m_pkthdr.rcvif->if_addrlist, ifa_list) {
663 1.140 matt if (ifa->ifa_addr->sa_family != AF_INET)
664 1.140 matt continue;
665 1.57 tls ia = ifatoia(ifa);
666 1.35 mycroft if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
667 1.35 mycroft in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
668 1.20 mycroft /*
669 1.20 mycroft * Look for all-0's host part (old broadcast addr),
670 1.20 mycroft * either for subnet or net.
671 1.20 mycroft */
672 1.20 mycroft ip->ip_dst.s_addr == ia->ia_subnet ||
673 1.18 mycroft ip->ip_dst.s_addr == ia->ia_net)
674 1.1 cgd goto ours;
675 1.57 tls /*
676 1.57 tls * An interface with IP address zero accepts
677 1.57 tls * all packets that arrive on that interface.
678 1.57 tls */
679 1.57 tls if (in_nullhost(ia->ia_addr.sin_addr))
680 1.57 tls goto ours;
681 1.1 cgd }
682 1.1 cgd }
683 1.18 mycroft if (IN_MULTICAST(ip->ip_dst.s_addr)) {
684 1.4 hpeyerl struct in_multi *inm;
685 1.4 hpeyerl #ifdef MROUTING
686 1.4 hpeyerl extern struct socket *ip_mrouter;
687 1.10 brezak
688 1.147 matt if (M_READONLY(m)) {
689 1.10 brezak if ((m = m_pullup(m, hlen)) == 0) {
690 1.10 brezak ipstat.ips_toosmall++;
691 1.89 itojun return;
692 1.10 brezak }
693 1.10 brezak ip = mtod(m, struct ip *);
694 1.10 brezak }
695 1.4 hpeyerl
696 1.4 hpeyerl if (ip_mrouter) {
697 1.4 hpeyerl /*
698 1.4 hpeyerl * If we are acting as a multicast router, all
699 1.4 hpeyerl * incoming multicast packets are passed to the
700 1.4 hpeyerl * kernel-level multicast forwarding function.
701 1.4 hpeyerl * The packet is returned (relatively) intact; if
702 1.4 hpeyerl * ip_mforward() returns a non-zero value, the packet
703 1.4 hpeyerl * must be discarded, else it may be accepted below.
704 1.4 hpeyerl *
705 1.4 hpeyerl * (The IP ident field is put in the same byte order
706 1.4 hpeyerl * as expected when ip_mforward() is called from
707 1.4 hpeyerl * ip_output().)
708 1.4 hpeyerl */
709 1.13 mycroft if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
710 1.13 mycroft ipstat.ips_cantforward++;
711 1.4 hpeyerl m_freem(m);
712 1.89 itojun return;
713 1.4 hpeyerl }
714 1.4 hpeyerl
715 1.4 hpeyerl /*
716 1.4 hpeyerl * The process-level routing demon needs to receive
717 1.4 hpeyerl * all multicast IGMP packets, whether or not this
718 1.4 hpeyerl * host belongs to their destination groups.
719 1.4 hpeyerl */
720 1.4 hpeyerl if (ip->ip_p == IPPROTO_IGMP)
721 1.4 hpeyerl goto ours;
722 1.13 mycroft ipstat.ips_forward++;
723 1.4 hpeyerl }
724 1.4 hpeyerl #endif
725 1.4 hpeyerl /*
726 1.4 hpeyerl * See if we belong to the destination multicast group on the
727 1.4 hpeyerl * arrival interface.
728 1.4 hpeyerl */
729 1.4 hpeyerl IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
730 1.4 hpeyerl if (inm == NULL) {
731 1.13 mycroft ipstat.ips_cantforward++;
732 1.4 hpeyerl m_freem(m);
733 1.89 itojun return;
734 1.4 hpeyerl }
735 1.4 hpeyerl goto ours;
736 1.4 hpeyerl }
737 1.19 mycroft if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
738 1.35 mycroft in_nullhost(ip->ip_dst))
739 1.1 cgd goto ours;
740 1.1 cgd
741 1.1 cgd /*
742 1.1 cgd * Not for us; forward if possible and desirable.
743 1.1 cgd */
744 1.1 cgd if (ipforwarding == 0) {
745 1.1 cgd ipstat.ips_cantforward++;
746 1.1 cgd m_freem(m);
747 1.100 itojun } else {
748 1.100 itojun /*
749 1.100 itojun * If ip_dst matched any of my address on !IFF_UP interface,
750 1.100 itojun * and there's no IFF_UP interface that matches ip_dst,
751 1.100 itojun * send icmp unreach. Forwarding it will result in in-kernel
752 1.100 itojun * forwarding loop till TTL goes to 0.
753 1.100 itojun */
754 1.100 itojun if (downmatch) {
755 1.100 itojun icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
756 1.100 itojun ipstat.ips_cantforward++;
757 1.100 itojun return;
758 1.100 itojun }
759 1.145 itojun #ifdef IPSEC
760 1.145 itojun if (ipsec4_in_reject(m, NULL)) {
761 1.145 itojun ipsecstat.in_polvio++;
762 1.145 itojun goto bad;
763 1.145 itojun }
764 1.145 itojun #endif
765 1.173 jonathan #ifdef FAST_IPSEC
766 1.173 jonathan mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
767 1.173 jonathan s = splsoftnet();
768 1.173 jonathan if (mtag != NULL) {
769 1.173 jonathan tdbi = (struct tdb_ident *)(mtag + 1);
770 1.173 jonathan sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
771 1.173 jonathan } else {
772 1.173 jonathan sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
773 1.173 jonathan IP_FORWARDING, &error);
774 1.173 jonathan }
775 1.173 jonathan if (sp == NULL) { /* NB: can happen if error */
776 1.173 jonathan splx(s);
777 1.173 jonathan /*XXX error stat???*/
778 1.173 jonathan DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/
779 1.173 jonathan goto bad;
780 1.173 jonathan }
781 1.173 jonathan
782 1.173 jonathan /*
783 1.173 jonathan * Check security policy against packet attributes.
784 1.173 jonathan */
785 1.173 jonathan error = ipsec_in_reject(sp, m);
786 1.173 jonathan KEY_FREESP(&sp);
787 1.173 jonathan splx(s);
788 1.173 jonathan if (error) {
789 1.173 jonathan ipstat.ips_cantforward++;
790 1.173 jonathan goto bad;
791 1.173 jonathan }
792 1.173 jonathan #endif /* FAST_IPSEC */
793 1.145 itojun
794 1.169 itojun ip_forward(m, srcrt);
795 1.100 itojun }
796 1.89 itojun return;
797 1.1 cgd
798 1.1 cgd ours:
799 1.1 cgd /*
800 1.1 cgd * If offset or IP_MF are set, must reassemble.
801 1.1 cgd * Otherwise, nothing need be done.
802 1.1 cgd * (We could look in the reassembly queue to see
803 1.1 cgd * if the packet was previously fragmented,
804 1.1 cgd * but it's not worth the time; just let them time out.)
805 1.1 cgd */
806 1.155 itojun if (ip->ip_off & ~htons(IP_DF|IP_RF)) {
807 1.155 itojun if (M_READONLY(m)) {
808 1.155 itojun if ((m = m_pullup(m, hlen)) == NULL) {
809 1.155 itojun ipstat.ips_toosmall++;
810 1.155 itojun goto bad;
811 1.155 itojun }
812 1.155 itojun ip = mtod(m, struct ip *);
813 1.155 itojun }
814 1.155 itojun
815 1.1 cgd /*
816 1.1 cgd * Look for queue of fragments
817 1.1 cgd * of this datagram.
818 1.1 cgd */
819 1.75 thorpej IPQ_LOCK();
820 1.140 matt LIST_FOREACH(fp, &ipq, ipq_q)
821 1.1 cgd if (ip->ip_id == fp->ipq_id &&
822 1.35 mycroft in_hosteq(ip->ip_src, fp->ipq_src) &&
823 1.35 mycroft in_hosteq(ip->ip_dst, fp->ipq_dst) &&
824 1.1 cgd ip->ip_p == fp->ipq_p)
825 1.1 cgd goto found;
826 1.1 cgd fp = 0;
827 1.1 cgd found:
828 1.1 cgd
829 1.1 cgd /*
830 1.1 cgd * Adjust ip_len to not reflect header,
831 1.25 cgd * set ipqe_mff if more fragments are expected,
832 1.1 cgd * convert offset of this to bytes.
833 1.1 cgd */
834 1.155 itojun ip->ip_len = htons(ntohs(ip->ip_len) - hlen);
835 1.155 itojun mff = (ip->ip_off & htons(IP_MF)) != 0;
836 1.25 cgd if (mff) {
837 1.16 cgd /*
838 1.16 cgd * Make sure that fragments have a data length
839 1.16 cgd * that's a non-zero multiple of 8 bytes.
840 1.16 cgd */
841 1.155 itojun if (ntohs(ip->ip_len) == 0 ||
842 1.155 itojun (ntohs(ip->ip_len) & 0x7) != 0) {
843 1.16 cgd ipstat.ips_badfrags++;
844 1.75 thorpej IPQ_UNLOCK();
845 1.16 cgd goto bad;
846 1.16 cgd }
847 1.16 cgd }
848 1.155 itojun ip->ip_off = htons((ntohs(ip->ip_off) & IP_OFFMASK) << 3);
849 1.1 cgd
850 1.1 cgd /*
851 1.1 cgd * If datagram marked as having more fragments
852 1.1 cgd * or if this is not the first fragment,
853 1.1 cgd * attempt reassembly; if it succeeds, proceed.
854 1.1 cgd */
855 1.155 itojun if (mff || ip->ip_off != htons(0)) {
856 1.1 cgd ipstat.ips_fragments++;
857 1.72 thorpej ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
858 1.25 cgd if (ipqe == NULL) {
859 1.25 cgd ipstat.ips_rcvmemdrop++;
860 1.75 thorpej IPQ_UNLOCK();
861 1.25 cgd goto bad;
862 1.25 cgd }
863 1.25 cgd ipqe->ipqe_mff = mff;
864 1.50 thorpej ipqe->ipqe_m = m;
865 1.25 cgd ipqe->ipqe_ip = ip;
866 1.50 thorpej m = ip_reass(ipqe, fp);
867 1.75 thorpej if (m == 0) {
868 1.75 thorpej IPQ_UNLOCK();
869 1.89 itojun return;
870 1.75 thorpej }
871 1.13 mycroft ipstat.ips_reassembled++;
872 1.50 thorpej ip = mtod(m, struct ip *);
873 1.74 thorpej hlen = ip->ip_hl << 2;
874 1.155 itojun ip->ip_len = htons(ntohs(ip->ip_len) + hlen);
875 1.1 cgd } else
876 1.1 cgd if (fp)
877 1.1 cgd ip_freef(fp);
878 1.75 thorpej IPQ_UNLOCK();
879 1.79 mycroft }
880 1.128 itojun
881 1.173 jonathan #if defined(IPSEC)
882 1.128 itojun /*
883 1.128 itojun * enforce IPsec policy checking if we are seeing last header.
884 1.128 itojun * note that we do not visit this with protocols with pcb layer
885 1.128 itojun * code - like udp/tcp/raw ip.
886 1.128 itojun */
887 1.128 itojun if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0 &&
888 1.128 itojun ipsec4_in_reject(m, NULL)) {
889 1.128 itojun ipsecstat.in_polvio++;
890 1.128 itojun goto bad;
891 1.128 itojun }
892 1.128 itojun #endif
893 1.173 jonathan #if FAST_IPSEC
894 1.173 jonathan /*
895 1.173 jonathan * enforce IPsec policy checking if we are seeing last header.
896 1.173 jonathan * note that we do not visit this with protocols with pcb layer
897 1.173 jonathan * code - like udp/tcp/raw ip.
898 1.173 jonathan */
899 1.173 jonathan if ((inetsw[ip_protox[ip->ip_p]].pr_flags & PR_LASTHDR) != 0) {
900 1.173 jonathan /*
901 1.173 jonathan * Check if the packet has already had IPsec processing
902 1.173 jonathan * done. If so, then just pass it along. This tag gets
903 1.173 jonathan * set during AH, ESP, etc. input handling, before the
904 1.173 jonathan * packet is returned to the ip input queue for delivery.
905 1.173 jonathan */
906 1.173 jonathan mtag = m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL);
907 1.173 jonathan s = splsoftnet();
908 1.173 jonathan if (mtag != NULL) {
909 1.173 jonathan tdbi = (struct tdb_ident *)(mtag + 1);
910 1.173 jonathan sp = ipsec_getpolicy(tdbi, IPSEC_DIR_INBOUND);
911 1.173 jonathan } else {
912 1.173 jonathan sp = ipsec_getpolicybyaddr(m, IPSEC_DIR_INBOUND,
913 1.173 jonathan IP_FORWARDING, &error);
914 1.173 jonathan }
915 1.173 jonathan if (sp != NULL) {
916 1.173 jonathan /*
917 1.173 jonathan * Check security policy against packet attributes.
918 1.173 jonathan */
919 1.173 jonathan error = ipsec_in_reject(sp, m);
920 1.173 jonathan KEY_FREESP(&sp);
921 1.173 jonathan } else {
922 1.173 jonathan /* XXX error stat??? */
923 1.173 jonathan error = EINVAL;
924 1.173 jonathan DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
925 1.173 jonathan goto bad;
926 1.173 jonathan }
927 1.173 jonathan splx(s);
928 1.173 jonathan if (error)
929 1.173 jonathan goto bad;
930 1.173 jonathan }
931 1.173 jonathan #endif /* FAST_IPSEC */
932 1.1 cgd
933 1.1 cgd /*
934 1.1 cgd * Switch out to protocol's input routine.
935 1.1 cgd */
936 1.82 aidan #if IFA_STATS
937 1.122 itojun if (ia && ip)
938 1.155 itojun ia->ia_ifa.ifa_data.ifad_inbytes += ntohs(ip->ip_len);
939 1.82 aidan #endif
940 1.1 cgd ipstat.ips_delivered++;
941 1.89 itojun {
942 1.89 itojun int off = hlen, nh = ip->ip_p;
943 1.89 itojun
944 1.89 itojun (*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
945 1.89 itojun return;
946 1.89 itojun }
947 1.1 cgd bad:
948 1.1 cgd m_freem(m);
949 1.135 thorpej return;
950 1.135 thorpej
951 1.135 thorpej badcsum:
952 1.135 thorpej ipstat.ips_badsum++;
953 1.135 thorpej m_freem(m);
954 1.1 cgd }
955 1.1 cgd
956 1.1 cgd /*
957 1.1 cgd * Take incoming datagram fragment and try to
958 1.1 cgd * reassemble it into whole datagram. If a chain for
959 1.1 cgd * reassembly of this datagram already exists, then it
960 1.1 cgd * is given as fp; otherwise have to make a chain.
961 1.1 cgd */
962 1.50 thorpej struct mbuf *
963 1.25 cgd ip_reass(ipqe, fp)
964 1.109 augustss struct ipqent *ipqe;
965 1.109 augustss struct ipq *fp;
966 1.1 cgd {
967 1.109 augustss struct mbuf *m = ipqe->ipqe_m;
968 1.109 augustss struct ipqent *nq, *p, *q;
969 1.25 cgd struct ip *ip;
970 1.1 cgd struct mbuf *t;
971 1.25 cgd int hlen = ipqe->ipqe_ip->ip_hl << 2;
972 1.1 cgd int i, next;
973 1.1 cgd
974 1.75 thorpej IPQ_LOCK_CHECK();
975 1.75 thorpej
976 1.1 cgd /*
977 1.1 cgd * Presence of header sizes in mbufs
978 1.1 cgd * would confuse code below.
979 1.1 cgd */
980 1.1 cgd m->m_data += hlen;
981 1.1 cgd m->m_len -= hlen;
982 1.1 cgd
983 1.1 cgd /*
984 1.1 cgd * If first fragment to arrive, create a reassembly queue.
985 1.1 cgd */
986 1.1 cgd if (fp == 0) {
987 1.131 itojun /*
988 1.131 itojun * Enforce upper bound on number of fragmented packets
989 1.131 itojun * for which we attempt reassembly;
990 1.131 itojun * If maxfrag is 0, never accept fragments.
991 1.131 itojun * If maxfrag is -1, accept all fragments without limitation.
992 1.131 itojun */
993 1.131 itojun if (ip_maxfragpackets < 0)
994 1.131 itojun ;
995 1.131 itojun else if (ip_nfragpackets >= ip_maxfragpackets)
996 1.131 itojun goto dropfrag;
997 1.131 itojun ip_nfragpackets++;
998 1.50 thorpej MALLOC(fp, struct ipq *, sizeof (struct ipq),
999 1.50 thorpej M_FTABLE, M_NOWAIT);
1000 1.50 thorpej if (fp == NULL)
1001 1.1 cgd goto dropfrag;
1002 1.25 cgd LIST_INSERT_HEAD(&ipq, fp, ipq_q);
1003 1.1 cgd fp->ipq_ttl = IPFRAGTTL;
1004 1.25 cgd fp->ipq_p = ipqe->ipqe_ip->ip_p;
1005 1.25 cgd fp->ipq_id = ipqe->ipqe_ip->ip_id;
1006 1.148 matt TAILQ_INIT(&fp->ipq_fragq);
1007 1.25 cgd fp->ipq_src = ipqe->ipqe_ip->ip_src;
1008 1.25 cgd fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
1009 1.25 cgd p = NULL;
1010 1.1 cgd goto insert;
1011 1.1 cgd }
1012 1.1 cgd
1013 1.1 cgd /*
1014 1.1 cgd * Find a segment which begins after this one does.
1015 1.1 cgd */
1016 1.148 matt for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
1017 1.148 matt p = q, q = TAILQ_NEXT(q, ipqe_q))
1018 1.155 itojun if (ntohs(q->ipqe_ip->ip_off) > ntohs(ipqe->ipqe_ip->ip_off))
1019 1.1 cgd break;
1020 1.1 cgd
1021 1.1 cgd /*
1022 1.1 cgd * If there is a preceding segment, it may provide some of
1023 1.1 cgd * our data already. If so, drop the data from the incoming
1024 1.1 cgd * segment. If it provides all of our data, drop us.
1025 1.1 cgd */
1026 1.25 cgd if (p != NULL) {
1027 1.155 itojun i = ntohs(p->ipqe_ip->ip_off) + ntohs(p->ipqe_ip->ip_len) -
1028 1.155 itojun ntohs(ipqe->ipqe_ip->ip_off);
1029 1.1 cgd if (i > 0) {
1030 1.155 itojun if (i >= ntohs(ipqe->ipqe_ip->ip_len))
1031 1.1 cgd goto dropfrag;
1032 1.50 thorpej m_adj(ipqe->ipqe_m, i);
1033 1.155 itojun ipqe->ipqe_ip->ip_off =
1034 1.155 itojun htons(ntohs(ipqe->ipqe_ip->ip_off) + i);
1035 1.155 itojun ipqe->ipqe_ip->ip_len =
1036 1.155 itojun htons(ntohs(ipqe->ipqe_ip->ip_len) - i);
1037 1.1 cgd }
1038 1.1 cgd }
1039 1.1 cgd
1040 1.1 cgd /*
1041 1.1 cgd * While we overlap succeeding segments trim them or,
1042 1.1 cgd * if they are completely covered, dequeue them.
1043 1.1 cgd */
1044 1.155 itojun for (; q != NULL &&
1045 1.155 itojun ntohs(ipqe->ipqe_ip->ip_off) + ntohs(ipqe->ipqe_ip->ip_len) >
1046 1.155 itojun ntohs(q->ipqe_ip->ip_off); q = nq) {
1047 1.155 itojun i = (ntohs(ipqe->ipqe_ip->ip_off) +
1048 1.155 itojun ntohs(ipqe->ipqe_ip->ip_len)) - ntohs(q->ipqe_ip->ip_off);
1049 1.155 itojun if (i < ntohs(q->ipqe_ip->ip_len)) {
1050 1.155 itojun q->ipqe_ip->ip_len =
1051 1.155 itojun htons(ntohs(q->ipqe_ip->ip_len) - i);
1052 1.155 itojun q->ipqe_ip->ip_off =
1053 1.155 itojun htons(ntohs(q->ipqe_ip->ip_off) + i);
1054 1.50 thorpej m_adj(q->ipqe_m, i);
1055 1.1 cgd break;
1056 1.1 cgd }
1057 1.148 matt nq = TAILQ_NEXT(q, ipqe_q);
1058 1.50 thorpej m_freem(q->ipqe_m);
1059 1.148 matt TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
1060 1.72 thorpej pool_put(&ipqent_pool, q);
1061 1.1 cgd }
1062 1.1 cgd
1063 1.1 cgd insert:
1064 1.1 cgd /*
1065 1.1 cgd * Stick new segment in its place;
1066 1.1 cgd * check for complete reassembly.
1067 1.1 cgd */
1068 1.25 cgd if (p == NULL) {
1069 1.148 matt TAILQ_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
1070 1.25 cgd } else {
1071 1.148 matt TAILQ_INSERT_AFTER(&fp->ipq_fragq, p, ipqe, ipqe_q);
1072 1.25 cgd }
1073 1.1 cgd next = 0;
1074 1.148 matt for (p = NULL, q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL;
1075 1.148 matt p = q, q = TAILQ_NEXT(q, ipqe_q)) {
1076 1.155 itojun if (ntohs(q->ipqe_ip->ip_off) != next)
1077 1.1 cgd return (0);
1078 1.155 itojun next += ntohs(q->ipqe_ip->ip_len);
1079 1.1 cgd }
1080 1.25 cgd if (p->ipqe_mff)
1081 1.1 cgd return (0);
1082 1.1 cgd
1083 1.1 cgd /*
1084 1.41 thorpej * Reassembly is complete. Check for a bogus message size and
1085 1.41 thorpej * concatenate fragments.
1086 1.1 cgd */
1087 1.148 matt q = TAILQ_FIRST(&fp->ipq_fragq);
1088 1.25 cgd ip = q->ipqe_ip;
1089 1.41 thorpej if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
1090 1.41 thorpej ipstat.ips_toolong++;
1091 1.41 thorpej ip_freef(fp);
1092 1.41 thorpej return (0);
1093 1.41 thorpej }
1094 1.50 thorpej m = q->ipqe_m;
1095 1.1 cgd t = m->m_next;
1096 1.1 cgd m->m_next = 0;
1097 1.1 cgd m_cat(m, t);
1098 1.148 matt nq = TAILQ_NEXT(q, ipqe_q);
1099 1.72 thorpej pool_put(&ipqent_pool, q);
1100 1.25 cgd for (q = nq; q != NULL; q = nq) {
1101 1.50 thorpej t = q->ipqe_m;
1102 1.148 matt nq = TAILQ_NEXT(q, ipqe_q);
1103 1.72 thorpej pool_put(&ipqent_pool, q);
1104 1.1 cgd m_cat(m, t);
1105 1.1 cgd }
1106 1.1 cgd
1107 1.1 cgd /*
1108 1.1 cgd * Create header for new ip packet by
1109 1.1 cgd * modifying header of first packet;
1110 1.1 cgd * dequeue and discard fragment reassembly header.
1111 1.1 cgd * Make header visible.
1112 1.1 cgd */
1113 1.155 itojun ip->ip_len = htons(next);
1114 1.25 cgd ip->ip_src = fp->ipq_src;
1115 1.25 cgd ip->ip_dst = fp->ipq_dst;
1116 1.25 cgd LIST_REMOVE(fp, ipq_q);
1117 1.50 thorpej FREE(fp, M_FTABLE);
1118 1.131 itojun ip_nfragpackets--;
1119 1.1 cgd m->m_len += (ip->ip_hl << 2);
1120 1.1 cgd m->m_data -= (ip->ip_hl << 2);
1121 1.1 cgd /* some debugging cruft by sklower, below, will go away soon */
1122 1.1 cgd if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
1123 1.109 augustss int plen = 0;
1124 1.50 thorpej for (t = m; t; t = t->m_next)
1125 1.50 thorpej plen += t->m_len;
1126 1.50 thorpej m->m_pkthdr.len = plen;
1127 1.1 cgd }
1128 1.50 thorpej return (m);
1129 1.1 cgd
1130 1.1 cgd dropfrag:
1131 1.1 cgd ipstat.ips_fragdropped++;
1132 1.1 cgd m_freem(m);
1133 1.72 thorpej pool_put(&ipqent_pool, ipqe);
1134 1.1 cgd return (0);
1135 1.1 cgd }
1136 1.1 cgd
1137 1.1 cgd /*
1138 1.1 cgd * Free a fragment reassembly header and all
1139 1.1 cgd * associated datagrams.
1140 1.1 cgd */
1141 1.8 mycroft void
1142 1.1 cgd ip_freef(fp)
1143 1.1 cgd struct ipq *fp;
1144 1.1 cgd {
1145 1.109 augustss struct ipqent *q, *p;
1146 1.1 cgd
1147 1.75 thorpej IPQ_LOCK_CHECK();
1148 1.75 thorpej
1149 1.148 matt for (q = TAILQ_FIRST(&fp->ipq_fragq); q != NULL; q = p) {
1150 1.148 matt p = TAILQ_NEXT(q, ipqe_q);
1151 1.50 thorpej m_freem(q->ipqe_m);
1152 1.148 matt TAILQ_REMOVE(&fp->ipq_fragq, q, ipqe_q);
1153 1.72 thorpej pool_put(&ipqent_pool, q);
1154 1.1 cgd }
1155 1.25 cgd LIST_REMOVE(fp, ipq_q);
1156 1.50 thorpej FREE(fp, M_FTABLE);
1157 1.131 itojun ip_nfragpackets--;
1158 1.1 cgd }
1159 1.1 cgd
1160 1.1 cgd /*
1161 1.1 cgd * IP timer processing;
1162 1.1 cgd * if a timer expires on a reassembly
1163 1.1 cgd * queue, discard it.
1164 1.1 cgd */
1165 1.8 mycroft void
1166 1.1 cgd ip_slowtimo()
1167 1.1 cgd {
1168 1.109 augustss struct ipq *fp, *nfp;
1169 1.24 mycroft int s = splsoftnet();
1170 1.1 cgd
1171 1.75 thorpej IPQ_LOCK();
1172 1.140 matt for (fp = LIST_FIRST(&ipq); fp != NULL; fp = nfp) {
1173 1.140 matt nfp = LIST_NEXT(fp, ipq_q);
1174 1.25 cgd if (--fp->ipq_ttl == 0) {
1175 1.1 cgd ipstat.ips_fragtimeout++;
1176 1.25 cgd ip_freef(fp);
1177 1.1 cgd }
1178 1.1 cgd }
1179 1.131 itojun /*
1180 1.131 itojun * If we are over the maximum number of fragments
1181 1.131 itojun * (due to the limit being lowered), drain off
1182 1.131 itojun * enough to get down to the new limit.
1183 1.131 itojun */
1184 1.131 itojun if (ip_maxfragpackets < 0)
1185 1.131 itojun ;
1186 1.131 itojun else {
1187 1.140 matt while (ip_nfragpackets > ip_maxfragpackets && LIST_FIRST(&ipq))
1188 1.140 matt ip_freef(LIST_FIRST(&ipq));
1189 1.131 itojun }
1190 1.75 thorpej IPQ_UNLOCK();
1191 1.63 matt #ifdef GATEWAY
1192 1.63 matt ipflow_slowtimo();
1193 1.63 matt #endif
1194 1.1 cgd splx(s);
1195 1.1 cgd }
1196 1.1 cgd
1197 1.1 cgd /*
1198 1.1 cgd * Drain off all datagram fragments.
1199 1.1 cgd */
1200 1.8 mycroft void
1201 1.1 cgd ip_drain()
1202 1.1 cgd {
1203 1.1 cgd
1204 1.75 thorpej /*
1205 1.75 thorpej * We may be called from a device's interrupt context. If
1206 1.75 thorpej * the ipq is already busy, just bail out now.
1207 1.75 thorpej */
1208 1.75 thorpej if (ipq_lock_try() == 0)
1209 1.75 thorpej return;
1210 1.75 thorpej
1211 1.140 matt while (LIST_FIRST(&ipq) != NULL) {
1212 1.1 cgd ipstat.ips_fragdropped++;
1213 1.140 matt ip_freef(LIST_FIRST(&ipq));
1214 1.1 cgd }
1215 1.75 thorpej
1216 1.75 thorpej IPQ_UNLOCK();
1217 1.1 cgd }
1218 1.1 cgd
1219 1.1 cgd /*
1220 1.1 cgd * Do option processing on a datagram,
1221 1.1 cgd * possibly discarding it if bad options are encountered,
1222 1.1 cgd * or forwarding it if source-routed.
1223 1.1 cgd * Returns 1 if packet has been forwarded/freed,
1224 1.1 cgd * 0 if the packet should be processed further.
1225 1.1 cgd */
1226 1.8 mycroft int
1227 1.1 cgd ip_dooptions(m)
1228 1.1 cgd struct mbuf *m;
1229 1.1 cgd {
1230 1.109 augustss struct ip *ip = mtod(m, struct ip *);
1231 1.109 augustss u_char *cp, *cp0;
1232 1.109 augustss struct ip_timestamp *ipt;
1233 1.109 augustss struct in_ifaddr *ia;
1234 1.1 cgd int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
1235 1.104 thorpej struct in_addr dst;
1236 1.1 cgd n_time ntime;
1237 1.1 cgd
1238 1.13 mycroft dst = ip->ip_dst;
1239 1.1 cgd cp = (u_char *)(ip + 1);
1240 1.1 cgd cnt = (ip->ip_hl << 2) - sizeof (struct ip);
1241 1.1 cgd for (; cnt > 0; cnt -= optlen, cp += optlen) {
1242 1.1 cgd opt = cp[IPOPT_OPTVAL];
1243 1.1 cgd if (opt == IPOPT_EOL)
1244 1.1 cgd break;
1245 1.1 cgd if (opt == IPOPT_NOP)
1246 1.1 cgd optlen = 1;
1247 1.1 cgd else {
1248 1.113 itojun if (cnt < IPOPT_OLEN + sizeof(*cp)) {
1249 1.113 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1250 1.113 itojun goto bad;
1251 1.113 itojun }
1252 1.1 cgd optlen = cp[IPOPT_OLEN];
1253 1.114 itojun if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
1254 1.1 cgd code = &cp[IPOPT_OLEN] - (u_char *)ip;
1255 1.1 cgd goto bad;
1256 1.1 cgd }
1257 1.1 cgd }
1258 1.1 cgd switch (opt) {
1259 1.1 cgd
1260 1.1 cgd default:
1261 1.1 cgd break;
1262 1.1 cgd
1263 1.1 cgd /*
1264 1.1 cgd * Source routing with record.
1265 1.1 cgd * Find interface with current destination address.
1266 1.1 cgd * If none on this machine then drop if strictly routed,
1267 1.1 cgd * or do nothing if loosely routed.
1268 1.1 cgd * Record interface address and bring up next address
1269 1.1 cgd * component. If strictly routed make sure next
1270 1.1 cgd * address is on directly accessible net.
1271 1.1 cgd */
1272 1.1 cgd case IPOPT_LSRR:
1273 1.1 cgd case IPOPT_SSRR:
1274 1.47 cjs if (ip_allowsrcrt == 0) {
1275 1.47 cjs type = ICMP_UNREACH;
1276 1.47 cjs code = ICMP_UNREACH_NET_PROHIB;
1277 1.47 cjs goto bad;
1278 1.47 cjs }
1279 1.114 itojun if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1280 1.114 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1281 1.114 itojun goto bad;
1282 1.114 itojun }
1283 1.1 cgd if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1284 1.1 cgd code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1285 1.1 cgd goto bad;
1286 1.1 cgd }
1287 1.1 cgd ipaddr.sin_addr = ip->ip_dst;
1288 1.19 mycroft ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
1289 1.1 cgd if (ia == 0) {
1290 1.1 cgd if (opt == IPOPT_SSRR) {
1291 1.1 cgd type = ICMP_UNREACH;
1292 1.1 cgd code = ICMP_UNREACH_SRCFAIL;
1293 1.1 cgd goto bad;
1294 1.1 cgd }
1295 1.1 cgd /*
1296 1.1 cgd * Loose routing, and not at next destination
1297 1.1 cgd * yet; nothing to do except forward.
1298 1.1 cgd */
1299 1.1 cgd break;
1300 1.1 cgd }
1301 1.1 cgd off--; /* 0 origin */
1302 1.112 sommerfe if ((off + sizeof(struct in_addr)) > optlen) {
1303 1.1 cgd /*
1304 1.1 cgd * End of source route. Should be for us.
1305 1.1 cgd */
1306 1.1 cgd save_rte(cp, ip->ip_src);
1307 1.1 cgd break;
1308 1.1 cgd }
1309 1.1 cgd /*
1310 1.1 cgd * locate outgoing interface
1311 1.1 cgd */
1312 1.1 cgd bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
1313 1.1 cgd sizeof(ipaddr.sin_addr));
1314 1.96 thorpej if (opt == IPOPT_SSRR)
1315 1.96 thorpej ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
1316 1.96 thorpej else
1317 1.1 cgd ia = ip_rtaddr(ipaddr.sin_addr);
1318 1.1 cgd if (ia == 0) {
1319 1.1 cgd type = ICMP_UNREACH;
1320 1.1 cgd code = ICMP_UNREACH_SRCFAIL;
1321 1.1 cgd goto bad;
1322 1.1 cgd }
1323 1.1 cgd ip->ip_dst = ipaddr.sin_addr;
1324 1.20 mycroft bcopy((caddr_t)&ia->ia_addr.sin_addr,
1325 1.1 cgd (caddr_t)(cp + off), sizeof(struct in_addr));
1326 1.1 cgd cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1327 1.13 mycroft /*
1328 1.13 mycroft * Let ip_intr's mcast routing check handle mcast pkts
1329 1.13 mycroft */
1330 1.18 mycroft forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1331 1.1 cgd break;
1332 1.1 cgd
1333 1.1 cgd case IPOPT_RR:
1334 1.114 itojun if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1335 1.114 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1336 1.114 itojun goto bad;
1337 1.114 itojun }
1338 1.1 cgd if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1339 1.1 cgd code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1340 1.1 cgd goto bad;
1341 1.1 cgd }
1342 1.1 cgd /*
1343 1.1 cgd * If no space remains, ignore.
1344 1.1 cgd */
1345 1.1 cgd off--; /* 0 origin */
1346 1.112 sommerfe if ((off + sizeof(struct in_addr)) > optlen)
1347 1.1 cgd break;
1348 1.1 cgd bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
1349 1.1 cgd sizeof(ipaddr.sin_addr));
1350 1.1 cgd /*
1351 1.1 cgd * locate outgoing interface; if we're the destination,
1352 1.1 cgd * use the incoming interface (should be same).
1353 1.1 cgd */
1354 1.96 thorpej if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
1355 1.96 thorpej == NULL &&
1356 1.96 thorpej (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
1357 1.1 cgd type = ICMP_UNREACH;
1358 1.1 cgd code = ICMP_UNREACH_HOST;
1359 1.1 cgd goto bad;
1360 1.1 cgd }
1361 1.20 mycroft bcopy((caddr_t)&ia->ia_addr.sin_addr,
1362 1.1 cgd (caddr_t)(cp + off), sizeof(struct in_addr));
1363 1.1 cgd cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1364 1.1 cgd break;
1365 1.1 cgd
1366 1.1 cgd case IPOPT_TS:
1367 1.1 cgd code = cp - (u_char *)ip;
1368 1.1 cgd ipt = (struct ip_timestamp *)cp;
1369 1.114 itojun if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
1370 1.114 itojun code = (u_char *)&ipt->ipt_len - (u_char *)ip;
1371 1.1 cgd goto bad;
1372 1.114 itojun }
1373 1.114 itojun if (ipt->ipt_ptr < 5) {
1374 1.114 itojun code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
1375 1.114 itojun goto bad;
1376 1.114 itojun }
1377 1.15 cgd if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
1378 1.114 itojun if (++ipt->ipt_oflw == 0) {
1379 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1380 1.114 itojun (u_char *)ip;
1381 1.1 cgd goto bad;
1382 1.114 itojun }
1383 1.1 cgd break;
1384 1.1 cgd }
1385 1.104 thorpej cp0 = (cp + ipt->ipt_ptr - 1);
1386 1.1 cgd switch (ipt->ipt_flg) {
1387 1.1 cgd
1388 1.1 cgd case IPOPT_TS_TSONLY:
1389 1.1 cgd break;
1390 1.1 cgd
1391 1.1 cgd case IPOPT_TS_TSANDADDR:
1392 1.66 thorpej if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1393 1.114 itojun sizeof(struct in_addr) > ipt->ipt_len) {
1394 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1395 1.114 itojun (u_char *)ip;
1396 1.1 cgd goto bad;
1397 1.114 itojun }
1398 1.13 mycroft ipaddr.sin_addr = dst;
1399 1.96 thorpej ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
1400 1.96 thorpej m->m_pkthdr.rcvif));
1401 1.13 mycroft if (ia == 0)
1402 1.13 mycroft continue;
1403 1.104 thorpej bcopy(&ia->ia_addr.sin_addr,
1404 1.104 thorpej cp0, sizeof(struct in_addr));
1405 1.1 cgd ipt->ipt_ptr += sizeof(struct in_addr);
1406 1.1 cgd break;
1407 1.1 cgd
1408 1.1 cgd case IPOPT_TS_PRESPEC:
1409 1.66 thorpej if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1410 1.114 itojun sizeof(struct in_addr) > ipt->ipt_len) {
1411 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1412 1.114 itojun (u_char *)ip;
1413 1.1 cgd goto bad;
1414 1.114 itojun }
1415 1.104 thorpej bcopy(cp0, &ipaddr.sin_addr,
1416 1.1 cgd sizeof(struct in_addr));
1417 1.96 thorpej if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
1418 1.96 thorpej == NULL)
1419 1.1 cgd continue;
1420 1.1 cgd ipt->ipt_ptr += sizeof(struct in_addr);
1421 1.1 cgd break;
1422 1.1 cgd
1423 1.1 cgd default:
1424 1.114 itojun /* XXX can't take &ipt->ipt_flg */
1425 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1426 1.114 itojun (u_char *)ip + 1;
1427 1.1 cgd goto bad;
1428 1.1 cgd }
1429 1.1 cgd ntime = iptime();
1430 1.107 thorpej cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
1431 1.107 thorpej bcopy(cp0, (caddr_t)cp + ipt->ipt_ptr - 1,
1432 1.1 cgd sizeof(n_time));
1433 1.1 cgd ipt->ipt_ptr += sizeof(n_time);
1434 1.1 cgd }
1435 1.1 cgd }
1436 1.1 cgd if (forward) {
1437 1.26 thorpej if (ip_forwsrcrt == 0) {
1438 1.26 thorpej type = ICMP_UNREACH;
1439 1.26 thorpej code = ICMP_UNREACH_SRCFAIL;
1440 1.26 thorpej goto bad;
1441 1.26 thorpej }
1442 1.1 cgd ip_forward(m, 1);
1443 1.1 cgd return (1);
1444 1.13 mycroft }
1445 1.13 mycroft return (0);
1446 1.1 cgd bad:
1447 1.13 mycroft icmp_error(m, type, code, 0, 0);
1448 1.13 mycroft ipstat.ips_badoptions++;
1449 1.1 cgd return (1);
1450 1.1 cgd }
1451 1.1 cgd
1452 1.1 cgd /*
1453 1.1 cgd * Given address of next destination (final or next hop),
1454 1.1 cgd * return internet address info of interface to be used to get there.
1455 1.1 cgd */
1456 1.1 cgd struct in_ifaddr *
1457 1.1 cgd ip_rtaddr(dst)
1458 1.1 cgd struct in_addr dst;
1459 1.1 cgd {
1460 1.109 augustss struct sockaddr_in *sin;
1461 1.1 cgd
1462 1.19 mycroft sin = satosin(&ipforward_rt.ro_dst);
1463 1.1 cgd
1464 1.35 mycroft if (ipforward_rt.ro_rt == 0 || !in_hosteq(dst, sin->sin_addr)) {
1465 1.1 cgd if (ipforward_rt.ro_rt) {
1466 1.1 cgd RTFREE(ipforward_rt.ro_rt);
1467 1.1 cgd ipforward_rt.ro_rt = 0;
1468 1.1 cgd }
1469 1.1 cgd sin->sin_family = AF_INET;
1470 1.1 cgd sin->sin_len = sizeof(*sin);
1471 1.1 cgd sin->sin_addr = dst;
1472 1.1 cgd
1473 1.1 cgd rtalloc(&ipforward_rt);
1474 1.1 cgd }
1475 1.1 cgd if (ipforward_rt.ro_rt == 0)
1476 1.1 cgd return ((struct in_ifaddr *)0);
1477 1.19 mycroft return (ifatoia(ipforward_rt.ro_rt->rt_ifa));
1478 1.1 cgd }
1479 1.1 cgd
1480 1.1 cgd /*
1481 1.1 cgd * Save incoming source route for use in replies,
1482 1.1 cgd * to be picked up later by ip_srcroute if the receiver is interested.
1483 1.1 cgd */
1484 1.13 mycroft void
1485 1.1 cgd save_rte(option, dst)
1486 1.1 cgd u_char *option;
1487 1.1 cgd struct in_addr dst;
1488 1.1 cgd {
1489 1.1 cgd unsigned olen;
1490 1.1 cgd
1491 1.1 cgd olen = option[IPOPT_OLEN];
1492 1.1 cgd #ifdef DIAGNOSTIC
1493 1.1 cgd if (ipprintfs)
1494 1.39 christos printf("save_rte: olen %d\n", olen);
1495 1.89 itojun #endif /* 0 */
1496 1.1 cgd if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1497 1.1 cgd return;
1498 1.1 cgd bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
1499 1.1 cgd ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1500 1.1 cgd ip_srcrt.dst = dst;
1501 1.1 cgd }
1502 1.1 cgd
1503 1.1 cgd /*
1504 1.1 cgd * Retrieve incoming source route for use in replies,
1505 1.1 cgd * in the same form used by setsockopt.
1506 1.1 cgd * The first hop is placed before the options, will be removed later.
1507 1.1 cgd */
1508 1.1 cgd struct mbuf *
1509 1.1 cgd ip_srcroute()
1510 1.1 cgd {
1511 1.109 augustss struct in_addr *p, *q;
1512 1.109 augustss struct mbuf *m;
1513 1.1 cgd
1514 1.1 cgd if (ip_nhops == 0)
1515 1.1 cgd return ((struct mbuf *)0);
1516 1.1 cgd m = m_get(M_DONTWAIT, MT_SOOPTS);
1517 1.1 cgd if (m == 0)
1518 1.1 cgd return ((struct mbuf *)0);
1519 1.1 cgd
1520 1.164 matt MCLAIM(m, &inetdomain.dom_mowner);
1521 1.13 mycroft #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1522 1.1 cgd
1523 1.1 cgd /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1524 1.1 cgd m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1525 1.1 cgd OPTSIZ;
1526 1.1 cgd #ifdef DIAGNOSTIC
1527 1.1 cgd if (ipprintfs)
1528 1.39 christos printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1529 1.1 cgd #endif
1530 1.1 cgd
1531 1.1 cgd /*
1532 1.1 cgd * First save first hop for return route
1533 1.1 cgd */
1534 1.1 cgd p = &ip_srcrt.route[ip_nhops - 1];
1535 1.1 cgd *(mtod(m, struct in_addr *)) = *p--;
1536 1.1 cgd #ifdef DIAGNOSTIC
1537 1.1 cgd if (ipprintfs)
1538 1.39 christos printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
1539 1.1 cgd #endif
1540 1.1 cgd
1541 1.1 cgd /*
1542 1.1 cgd * Copy option fields and padding (nop) to mbuf.
1543 1.1 cgd */
1544 1.1 cgd ip_srcrt.nop = IPOPT_NOP;
1545 1.1 cgd ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1546 1.1 cgd bcopy((caddr_t)&ip_srcrt.nop,
1547 1.1 cgd mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
1548 1.1 cgd q = (struct in_addr *)(mtod(m, caddr_t) +
1549 1.1 cgd sizeof(struct in_addr) + OPTSIZ);
1550 1.1 cgd #undef OPTSIZ
1551 1.1 cgd /*
1552 1.1 cgd * Record return path as an IP source route,
1553 1.1 cgd * reversing the path (pointers are now aligned).
1554 1.1 cgd */
1555 1.1 cgd while (p >= ip_srcrt.route) {
1556 1.1 cgd #ifdef DIAGNOSTIC
1557 1.1 cgd if (ipprintfs)
1558 1.39 christos printf(" %x", ntohl(q->s_addr));
1559 1.1 cgd #endif
1560 1.1 cgd *q++ = *p--;
1561 1.1 cgd }
1562 1.1 cgd /*
1563 1.1 cgd * Last hop goes to final destination.
1564 1.1 cgd */
1565 1.1 cgd *q = ip_srcrt.dst;
1566 1.1 cgd #ifdef DIAGNOSTIC
1567 1.1 cgd if (ipprintfs)
1568 1.39 christos printf(" %x\n", ntohl(q->s_addr));
1569 1.1 cgd #endif
1570 1.1 cgd return (m);
1571 1.1 cgd }
1572 1.1 cgd
1573 1.1 cgd /*
1574 1.1 cgd * Strip out IP options, at higher
1575 1.1 cgd * level protocol in the kernel.
1576 1.1 cgd * Second argument is buffer to which options
1577 1.1 cgd * will be moved, and return value is their length.
1578 1.1 cgd * XXX should be deleted; last arg currently ignored.
1579 1.1 cgd */
1580 1.8 mycroft void
1581 1.1 cgd ip_stripoptions(m, mopt)
1582 1.109 augustss struct mbuf *m;
1583 1.1 cgd struct mbuf *mopt;
1584 1.1 cgd {
1585 1.109 augustss int i;
1586 1.1 cgd struct ip *ip = mtod(m, struct ip *);
1587 1.109 augustss caddr_t opts;
1588 1.1 cgd int olen;
1589 1.1 cgd
1590 1.79 mycroft olen = (ip->ip_hl << 2) - sizeof (struct ip);
1591 1.1 cgd opts = (caddr_t)(ip + 1);
1592 1.1 cgd i = m->m_len - (sizeof (struct ip) + olen);
1593 1.1 cgd bcopy(opts + olen, opts, (unsigned)i);
1594 1.1 cgd m->m_len -= olen;
1595 1.1 cgd if (m->m_flags & M_PKTHDR)
1596 1.1 cgd m->m_pkthdr.len -= olen;
1597 1.155 itojun ip->ip_len = htons(ntohs(ip->ip_len) - olen);
1598 1.79 mycroft ip->ip_hl = sizeof (struct ip) >> 2;
1599 1.1 cgd }
1600 1.1 cgd
1601 1.139 matt const int inetctlerrmap[PRC_NCMDS] = {
1602 1.1 cgd 0, 0, 0, 0,
1603 1.1 cgd 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1604 1.1 cgd EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1605 1.1 cgd EMSGSIZE, EHOSTUNREACH, 0, 0,
1606 1.1 cgd 0, 0, 0, 0,
1607 1.1 cgd ENOPROTOOPT
1608 1.1 cgd };
1609 1.1 cgd
1610 1.1 cgd /*
1611 1.1 cgd * Forward a packet. If some error occurs return the sender
1612 1.1 cgd * an icmp packet. Note we can't always generate a meaningful
1613 1.1 cgd * icmp message because icmp doesn't have a large enough repertoire
1614 1.1 cgd * of codes and types.
1615 1.1 cgd *
1616 1.1 cgd * If not forwarding, just drop the packet. This could be confusing
1617 1.1 cgd * if ipforwarding was zero but some routing protocol was advancing
1618 1.1 cgd * us as a gateway to somewhere. However, we must let the routing
1619 1.1 cgd * protocol deal with that.
1620 1.1 cgd *
1621 1.1 cgd * The srcrt parameter indicates whether the packet is being forwarded
1622 1.1 cgd * via a source route.
1623 1.1 cgd */
1624 1.13 mycroft void
1625 1.1 cgd ip_forward(m, srcrt)
1626 1.1 cgd struct mbuf *m;
1627 1.1 cgd int srcrt;
1628 1.1 cgd {
1629 1.109 augustss struct ip *ip = mtod(m, struct ip *);
1630 1.109 augustss struct sockaddr_in *sin;
1631 1.109 augustss struct rtentry *rt;
1632 1.28 christos int error, type = 0, code = 0;
1633 1.1 cgd struct mbuf *mcopy;
1634 1.13 mycroft n_long dest;
1635 1.13 mycroft struct ifnet *destifp;
1636 1.173 jonathan #if defined(IPSEC) || defined(FAST_IPSEC)
1637 1.89 itojun struct ifnet dummyifp;
1638 1.89 itojun #endif
1639 1.164 matt
1640 1.164 matt /*
1641 1.164 matt * We are now in the output path.
1642 1.164 matt */
1643 1.164 matt MCLAIM(m, &ip_tx_mowner);
1644 1.135 thorpej
1645 1.135 thorpej /*
1646 1.135 thorpej * Clear any in-bound checksum flags for this packet.
1647 1.135 thorpej */
1648 1.135 thorpej m->m_pkthdr.csum_flags = 0;
1649 1.1 cgd
1650 1.13 mycroft dest = 0;
1651 1.1 cgd #ifdef DIAGNOSTIC
1652 1.1 cgd if (ipprintfs)
1653 1.70 thorpej printf("forward: src %2.2x dst %2.2x ttl %x\n",
1654 1.70 thorpej ntohl(ip->ip_src.s_addr),
1655 1.70 thorpej ntohl(ip->ip_dst.s_addr), ip->ip_ttl);
1656 1.1 cgd #endif
1657 1.93 sommerfe if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1658 1.1 cgd ipstat.ips_cantforward++;
1659 1.1 cgd m_freem(m);
1660 1.1 cgd return;
1661 1.1 cgd }
1662 1.1 cgd if (ip->ip_ttl <= IPTTLDEC) {
1663 1.13 mycroft icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1664 1.1 cgd return;
1665 1.1 cgd }
1666 1.1 cgd ip->ip_ttl -= IPTTLDEC;
1667 1.1 cgd
1668 1.19 mycroft sin = satosin(&ipforward_rt.ro_dst);
1669 1.1 cgd if ((rt = ipforward_rt.ro_rt) == 0 ||
1670 1.35 mycroft !in_hosteq(ip->ip_dst, sin->sin_addr)) {
1671 1.1 cgd if (ipforward_rt.ro_rt) {
1672 1.1 cgd RTFREE(ipforward_rt.ro_rt);
1673 1.1 cgd ipforward_rt.ro_rt = 0;
1674 1.1 cgd }
1675 1.1 cgd sin->sin_family = AF_INET;
1676 1.35 mycroft sin->sin_len = sizeof(struct sockaddr_in);
1677 1.1 cgd sin->sin_addr = ip->ip_dst;
1678 1.1 cgd
1679 1.1 cgd rtalloc(&ipforward_rt);
1680 1.1 cgd if (ipforward_rt.ro_rt == 0) {
1681 1.13 mycroft icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1682 1.1 cgd return;
1683 1.1 cgd }
1684 1.1 cgd rt = ipforward_rt.ro_rt;
1685 1.1 cgd }
1686 1.1 cgd
1687 1.1 cgd /*
1688 1.34 mycroft * Save at most 68 bytes of the packet in case
1689 1.1 cgd * we need to generate an ICMP message to the src.
1690 1.119 itojun * Pullup to avoid sharing mbuf cluster between m and mcopy.
1691 1.1 cgd */
1692 1.155 itojun mcopy = m_copym(m, 0, imin(ntohs(ip->ip_len), 68), M_DONTWAIT);
1693 1.119 itojun if (mcopy)
1694 1.119 itojun mcopy = m_pullup(mcopy, ip->ip_hl << 2);
1695 1.1 cgd
1696 1.1 cgd /*
1697 1.1 cgd * If forwarding packet using same interface that it came in on,
1698 1.1 cgd * perhaps should send a redirect to sender to shortcut a hop.
1699 1.1 cgd * Only send redirect if source is sending directly to us,
1700 1.1 cgd * and if packet was not source routed (or has any options).
1701 1.1 cgd * Also, don't send redirect if forwarding using a default route
1702 1.1 cgd * or a route modified by a redirect.
1703 1.1 cgd */
1704 1.1 cgd if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1705 1.1 cgd (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1706 1.35 mycroft !in_nullhost(satosin(rt_key(rt))->sin_addr) &&
1707 1.1 cgd ipsendredirects && !srcrt) {
1708 1.19 mycroft if (rt->rt_ifa &&
1709 1.19 mycroft (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
1710 1.19 mycroft ifatoia(rt->rt_ifa)->ia_subnet) {
1711 1.77 thorpej if (rt->rt_flags & RTF_GATEWAY)
1712 1.77 thorpej dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1713 1.77 thorpej else
1714 1.77 thorpej dest = ip->ip_dst.s_addr;
1715 1.77 thorpej /*
1716 1.77 thorpej * Router requirements says to only send host
1717 1.77 thorpej * redirects.
1718 1.77 thorpej */
1719 1.77 thorpej type = ICMP_REDIRECT;
1720 1.77 thorpej code = ICMP_REDIRECT_HOST;
1721 1.1 cgd #ifdef DIAGNOSTIC
1722 1.77 thorpej if (ipprintfs)
1723 1.77 thorpej printf("redirect (%d) to %x\n", code,
1724 1.77 thorpej (u_int32_t)dest);
1725 1.1 cgd #endif
1726 1.1 cgd }
1727 1.1 cgd }
1728 1.1 cgd
1729 1.27 thorpej error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1730 1.173 jonathan (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)),
1731 1.174 itojun (struct ip_moptions *)NULL, (struct socket *)NULL);
1732 1.173 jonathan
1733 1.1 cgd if (error)
1734 1.1 cgd ipstat.ips_cantforward++;
1735 1.1 cgd else {
1736 1.1 cgd ipstat.ips_forward++;
1737 1.1 cgd if (type)
1738 1.1 cgd ipstat.ips_redirectsent++;
1739 1.1 cgd else {
1740 1.63 matt if (mcopy) {
1741 1.63 matt #ifdef GATEWAY
1742 1.64 thorpej if (mcopy->m_flags & M_CANFASTFWD)
1743 1.64 thorpej ipflow_create(&ipforward_rt, mcopy);
1744 1.63 matt #endif
1745 1.1 cgd m_freem(mcopy);
1746 1.63 matt }
1747 1.1 cgd return;
1748 1.1 cgd }
1749 1.1 cgd }
1750 1.1 cgd if (mcopy == NULL)
1751 1.1 cgd return;
1752 1.13 mycroft destifp = NULL;
1753 1.13 mycroft
1754 1.1 cgd switch (error) {
1755 1.1 cgd
1756 1.1 cgd case 0: /* forwarded, but need redirect */
1757 1.1 cgd /* type, code set above */
1758 1.1 cgd break;
1759 1.1 cgd
1760 1.1 cgd case ENETUNREACH: /* shouldn't happen, checked above */
1761 1.1 cgd case EHOSTUNREACH:
1762 1.1 cgd case ENETDOWN:
1763 1.1 cgd case EHOSTDOWN:
1764 1.1 cgd default:
1765 1.1 cgd type = ICMP_UNREACH;
1766 1.1 cgd code = ICMP_UNREACH_HOST;
1767 1.1 cgd break;
1768 1.1 cgd
1769 1.1 cgd case EMSGSIZE:
1770 1.1 cgd type = ICMP_UNREACH;
1771 1.1 cgd code = ICMP_UNREACH_NEEDFRAG;
1772 1.173 jonathan #if !defined(IPSEC) && !defined(FAST_IPSEC)
1773 1.13 mycroft if (ipforward_rt.ro_rt)
1774 1.13 mycroft destifp = ipforward_rt.ro_rt->rt_ifp;
1775 1.89 itojun #else
1776 1.89 itojun /*
1777 1.89 itojun * If the packet is routed over IPsec tunnel, tell the
1778 1.89 itojun * originator the tunnel MTU.
1779 1.89 itojun * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1780 1.89 itojun * XXX quickhack!!!
1781 1.89 itojun */
1782 1.89 itojun if (ipforward_rt.ro_rt) {
1783 1.89 itojun struct secpolicy *sp;
1784 1.89 itojun int ipsecerror;
1785 1.95 itojun size_t ipsechdr;
1786 1.89 itojun struct route *ro;
1787 1.89 itojun
1788 1.89 itojun sp = ipsec4_getpolicybyaddr(mcopy,
1789 1.170 itojun IPSEC_DIR_OUTBOUND, IP_FORWARDING,
1790 1.170 itojun &ipsecerror);
1791 1.89 itojun
1792 1.89 itojun if (sp == NULL)
1793 1.89 itojun destifp = ipforward_rt.ro_rt->rt_ifp;
1794 1.89 itojun else {
1795 1.89 itojun /* count IPsec header size */
1796 1.95 itojun ipsechdr = ipsec4_hdrsiz(mcopy,
1797 1.170 itojun IPSEC_DIR_OUTBOUND, NULL);
1798 1.89 itojun
1799 1.89 itojun /*
1800 1.89 itojun * find the correct route for outer IPv4
1801 1.89 itojun * header, compute tunnel MTU.
1802 1.89 itojun *
1803 1.89 itojun * XXX BUG ALERT
1804 1.89 itojun * The "dummyifp" code relies upon the fact
1805 1.89 itojun * that icmp_error() touches only ifp->if_mtu.
1806 1.89 itojun */
1807 1.89 itojun /*XXX*/
1808 1.89 itojun destifp = NULL;
1809 1.89 itojun if (sp->req != NULL
1810 1.95 itojun && sp->req->sav != NULL
1811 1.95 itojun && sp->req->sav->sah != NULL) {
1812 1.95 itojun ro = &sp->req->sav->sah->sa_route;
1813 1.89 itojun if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1814 1.89 itojun dummyifp.if_mtu =
1815 1.151 itojun ro->ro_rt->rt_rmx.rmx_mtu ?
1816 1.151 itojun ro->ro_rt->rt_rmx.rmx_mtu :
1817 1.89 itojun ro->ro_rt->rt_ifp->if_mtu;
1818 1.89 itojun dummyifp.if_mtu -= ipsechdr;
1819 1.89 itojun destifp = &dummyifp;
1820 1.89 itojun }
1821 1.89 itojun }
1822 1.89 itojun
1823 1.173 jonathan #ifdef IPSEC
1824 1.89 itojun key_freesp(sp);
1825 1.173 jonathan #else
1826 1.173 jonathan KEY_FREESP(&sp);
1827 1.173 jonathan #endif
1828 1.89 itojun }
1829 1.89 itojun }
1830 1.89 itojun #endif /*IPSEC*/
1831 1.1 cgd ipstat.ips_cantfrag++;
1832 1.1 cgd break;
1833 1.1 cgd
1834 1.1 cgd case ENOBUFS:
1835 1.143 itojun #if 1
1836 1.143 itojun /*
1837 1.143 itojun * a router should not generate ICMP_SOURCEQUENCH as
1838 1.143 itojun * required in RFC1812 Requirements for IP Version 4 Routers.
1839 1.143 itojun * source quench could be a big problem under DoS attacks,
1840 1.149 wiz * or if the underlying interface is rate-limited.
1841 1.143 itojun */
1842 1.143 itojun if (mcopy)
1843 1.143 itojun m_freem(mcopy);
1844 1.143 itojun return;
1845 1.143 itojun #else
1846 1.1 cgd type = ICMP_SOURCEQUENCH;
1847 1.1 cgd code = 0;
1848 1.1 cgd break;
1849 1.143 itojun #endif
1850 1.1 cgd }
1851 1.13 mycroft icmp_error(mcopy, type, code, dest, destifp);
1852 1.44 thorpej }
1853 1.44 thorpej
1854 1.44 thorpej void
1855 1.44 thorpej ip_savecontrol(inp, mp, ip, m)
1856 1.109 augustss struct inpcb *inp;
1857 1.109 augustss struct mbuf **mp;
1858 1.109 augustss struct ip *ip;
1859 1.109 augustss struct mbuf *m;
1860 1.44 thorpej {
1861 1.44 thorpej
1862 1.44 thorpej if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1863 1.44 thorpej struct timeval tv;
1864 1.44 thorpej
1865 1.44 thorpej microtime(&tv);
1866 1.44 thorpej *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1867 1.44 thorpej SCM_TIMESTAMP, SOL_SOCKET);
1868 1.44 thorpej if (*mp)
1869 1.44 thorpej mp = &(*mp)->m_next;
1870 1.44 thorpej }
1871 1.44 thorpej if (inp->inp_flags & INP_RECVDSTADDR) {
1872 1.44 thorpej *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
1873 1.44 thorpej sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
1874 1.44 thorpej if (*mp)
1875 1.44 thorpej mp = &(*mp)->m_next;
1876 1.44 thorpej }
1877 1.44 thorpej #ifdef notyet
1878 1.44 thorpej /*
1879 1.44 thorpej * XXX
1880 1.44 thorpej * Moving these out of udp_input() made them even more broken
1881 1.44 thorpej * than they already were.
1882 1.44 thorpej * - fenner (at) parc.xerox.com
1883 1.44 thorpej */
1884 1.44 thorpej /* options were tossed already */
1885 1.44 thorpej if (inp->inp_flags & INP_RECVOPTS) {
1886 1.44 thorpej *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
1887 1.44 thorpej sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
1888 1.44 thorpej if (*mp)
1889 1.44 thorpej mp = &(*mp)->m_next;
1890 1.44 thorpej }
1891 1.44 thorpej /* ip_srcroute doesn't do what we want here, need to fix */
1892 1.44 thorpej if (inp->inp_flags & INP_RECVRETOPTS) {
1893 1.44 thorpej *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
1894 1.44 thorpej sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
1895 1.44 thorpej if (*mp)
1896 1.44 thorpej mp = &(*mp)->m_next;
1897 1.44 thorpej }
1898 1.44 thorpej #endif
1899 1.44 thorpej if (inp->inp_flags & INP_RECVIF) {
1900 1.44 thorpej struct sockaddr_dl sdl;
1901 1.44 thorpej
1902 1.44 thorpej sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
1903 1.44 thorpej sdl.sdl_family = AF_LINK;
1904 1.44 thorpej sdl.sdl_index = m->m_pkthdr.rcvif ?
1905 1.44 thorpej m->m_pkthdr.rcvif->if_index : 0;
1906 1.44 thorpej sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
1907 1.44 thorpej *mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
1908 1.44 thorpej IP_RECVIF, IPPROTO_IP);
1909 1.44 thorpej if (*mp)
1910 1.44 thorpej mp = &(*mp)->m_next;
1911 1.44 thorpej }
1912 1.13 mycroft }
1913 1.13 mycroft
1914 1.13 mycroft int
1915 1.13 mycroft ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1916 1.13 mycroft int *name;
1917 1.13 mycroft u_int namelen;
1918 1.13 mycroft void *oldp;
1919 1.13 mycroft size_t *oldlenp;
1920 1.13 mycroft void *newp;
1921 1.13 mycroft size_t newlen;
1922 1.13 mycroft {
1923 1.88 sommerfe extern int subnetsarelocal, hostzeroisbroadcast;
1924 1.52 thorpej
1925 1.54 lukem int error, old;
1926 1.54 lukem
1927 1.180 jonathan /* All sysctl names (except ifq.*) at this level are terminal. */
1928 1.180 jonathan if ((namelen != 1) && !(namelen == 2 && name[0] == IPCTL_IFQ))
1929 1.180 jonathan return (ENOTDIR);
1930 1.13 mycroft
1931 1.13 mycroft switch (name[0]) {
1932 1.13 mycroft case IPCTL_FORWARDING:
1933 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1934 1.13 mycroft case IPCTL_SENDREDIRECTS:
1935 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen,
1936 1.13 mycroft &ipsendredirects));
1937 1.13 mycroft case IPCTL_DEFTTL:
1938 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1939 1.13 mycroft #ifdef notyet
1940 1.13 mycroft case IPCTL_DEFMTU:
1941 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1942 1.13 mycroft #endif
1943 1.26 thorpej case IPCTL_FORWSRCRT:
1944 1.47 cjs /* Don't allow this to change in a secure environment. */
1945 1.26 thorpej if (securelevel > 0)
1946 1.46 cjs return (sysctl_rdint(oldp, oldlenp, newp,
1947 1.46 cjs ip_forwsrcrt));
1948 1.46 cjs else
1949 1.46 cjs return (sysctl_int(oldp, oldlenp, newp, newlen,
1950 1.46 cjs &ip_forwsrcrt));
1951 1.27 thorpej case IPCTL_DIRECTEDBCAST:
1952 1.27 thorpej return (sysctl_int(oldp, oldlenp, newp, newlen,
1953 1.27 thorpej &ip_directedbcast));
1954 1.47 cjs case IPCTL_ALLOWSRCRT:
1955 1.47 cjs return (sysctl_int(oldp, oldlenp, newp, newlen,
1956 1.47 cjs &ip_allowsrcrt));
1957 1.52 thorpej case IPCTL_SUBNETSARELOCAL:
1958 1.52 thorpej return (sysctl_int(oldp, oldlenp, newp, newlen,
1959 1.52 thorpej &subnetsarelocal));
1960 1.53 kml case IPCTL_MTUDISC:
1961 1.60 kml error = sysctl_int(oldp, oldlenp, newp, newlen,
1962 1.60 kml &ip_mtudisc);
1963 1.163 itojun if (error == 0 && ip_mtudisc == 0)
1964 1.163 itojun rt_timer_queue_remove_all(ip_mtudisc_timeout_q, TRUE);
1965 1.60 kml return error;
1966 1.54 lukem case IPCTL_ANONPORTMIN:
1967 1.54 lukem old = anonportmin;
1968 1.54 lukem error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmin);
1969 1.118 itojun if (anonportmin >= anonportmax || anonportmin < 0
1970 1.118 itojun || anonportmin > 65535
1971 1.54 lukem #ifndef IPNOPRIVPORTS
1972 1.54 lukem || anonportmin < IPPORT_RESERVED
1973 1.54 lukem #endif
1974 1.54 lukem ) {
1975 1.54 lukem anonportmin = old;
1976 1.54 lukem return (EINVAL);
1977 1.54 lukem }
1978 1.54 lukem return (error);
1979 1.54 lukem case IPCTL_ANONPORTMAX:
1980 1.54 lukem old = anonportmax;
1981 1.54 lukem error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmax);
1982 1.118 itojun if (anonportmin >= anonportmax || anonportmax < 0
1983 1.118 itojun || anonportmax > 65535
1984 1.54 lukem #ifndef IPNOPRIVPORTS
1985 1.54 lukem || anonportmax < IPPORT_RESERVED
1986 1.54 lukem #endif
1987 1.54 lukem ) {
1988 1.54 lukem anonportmax = old;
1989 1.54 lukem return (EINVAL);
1990 1.54 lukem }
1991 1.60 kml return (error);
1992 1.60 kml case IPCTL_MTUDISCTIMEOUT:
1993 1.158 itojun old = ip_mtudisc_timeout;
1994 1.60 kml error = sysctl_int(oldp, oldlenp, newp, newlen,
1995 1.60 kml &ip_mtudisc_timeout);
1996 1.158 itojun if (ip_mtudisc_timeout < 0) {
1997 1.158 itojun ip_mtudisc_timeout = old;
1998 1.156 itojun return (EINVAL);
1999 1.158 itojun }
2000 1.163 itojun if (error == 0)
2001 1.162 itojun rt_timer_queue_change(ip_mtudisc_timeout_q,
2002 1.162 itojun ip_mtudisc_timeout);
2003 1.54 lukem return (error);
2004 1.65 matt #ifdef GATEWAY
2005 1.65 matt case IPCTL_MAXFLOWS:
2006 1.67 thorpej {
2007 1.67 thorpej int s;
2008 1.67 thorpej
2009 1.65 matt error = sysctl_int(oldp, oldlenp, newp, newlen,
2010 1.65 matt &ip_maxflows);
2011 1.67 thorpej s = splsoftnet();
2012 1.65 matt ipflow_reap(0);
2013 1.67 thorpej splx(s);
2014 1.65 matt return (error);
2015 1.67 thorpej }
2016 1.89 itojun #endif
2017 1.90 itojun case IPCTL_HOSTZEROBROADCAST:
2018 1.90 itojun return (sysctl_int(oldp, oldlenp, newp, newlen,
2019 1.90 itojun &hostzeroisbroadcast));
2020 1.89 itojun #if NGIF > 0
2021 1.89 itojun case IPCTL_GIF_TTL:
2022 1.157 itojun return (sysctl_int(oldp, oldlenp, newp, newlen,
2023 1.90 itojun &ip_gif_ttl));
2024 1.144 martin #endif
2025 1.144 martin
2026 1.144 martin #if NGRE > 0
2027 1.144 martin case IPCTL_GRE_TTL:
2028 1.157 itojun return (sysctl_int(oldp, oldlenp, newp, newlen,
2029 1.144 martin &ip_gre_ttl));
2030 1.117 tron #endif
2031 1.117 tron
2032 1.117 tron #ifndef IPNOPRIVPORTS
2033 1.117 tron case IPCTL_LOWPORTMIN:
2034 1.117 tron old = lowportmin;
2035 1.117 tron error = sysctl_int(oldp, oldlenp, newp, newlen, &lowportmin);
2036 1.117 tron if (lowportmin >= lowportmax
2037 1.117 tron || lowportmin > IPPORT_RESERVEDMAX
2038 1.117 tron || lowportmin < IPPORT_RESERVEDMIN
2039 1.117 tron ) {
2040 1.117 tron lowportmin = old;
2041 1.117 tron return (EINVAL);
2042 1.117 tron }
2043 1.117 tron return (error);
2044 1.117 tron case IPCTL_LOWPORTMAX:
2045 1.117 tron old = lowportmax;
2046 1.117 tron error = sysctl_int(oldp, oldlenp, newp, newlen, &lowportmax);
2047 1.117 tron if (lowportmin >= lowportmax
2048 1.117 tron || lowportmax > IPPORT_RESERVEDMAX
2049 1.117 tron || lowportmax < IPPORT_RESERVEDMIN
2050 1.117 tron ) {
2051 1.117 tron lowportmax = old;
2052 1.117 tron return (EINVAL);
2053 1.117 tron }
2054 1.117 tron return (error);
2055 1.65 matt #endif
2056 1.131 itojun
2057 1.131 itojun case IPCTL_MAXFRAGPACKETS:
2058 1.131 itojun return (sysctl_int(oldp, oldlenp, newp, newlen,
2059 1.131 itojun &ip_maxfragpackets));
2060 1.88 sommerfe
2061 1.165 christos case IPCTL_CHECKINTERFACE:
2062 1.165 christos return (sysctl_int(oldp, oldlenp, newp, newlen,
2063 1.165 christos &ip_checkinterface));
2064 1.182 itojun
2065 1.180 jonathan case IPCTL_IFQ:
2066 1.182 itojun return (sysctl_ifq(name + 1, namelen - 1, oldp, oldlenp,
2067 1.182 itojun newp, newlen, &ipintrq));
2068 1.180 jonathan
2069 1.13 mycroft default:
2070 1.13 mycroft return (EOPNOTSUPP);
2071 1.13 mycroft }
2072 1.13 mycroft /* NOTREACHED */
2073 1.1 cgd }
2074