ip_input.c revision 1.115 1 1.115 mrg /* $NetBSD: ip_input.c,v 1.115 2000/06/28 03:01:16 mrg Exp $ */
2 1.89 itojun
3 1.89 itojun /*
4 1.89 itojun * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 1.89 itojun * All rights reserved.
6 1.89 itojun *
7 1.89 itojun * Redistribution and use in source and binary forms, with or without
8 1.89 itojun * modification, are permitted provided that the following conditions
9 1.89 itojun * are met:
10 1.89 itojun * 1. Redistributions of source code must retain the above copyright
11 1.89 itojun * notice, this list of conditions and the following disclaimer.
12 1.89 itojun * 2. Redistributions in binary form must reproduce the above copyright
13 1.89 itojun * notice, this list of conditions and the following disclaimer in the
14 1.89 itojun * documentation and/or other materials provided with the distribution.
15 1.89 itojun * 3. Neither the name of the project nor the names of its contributors
16 1.89 itojun * may be used to endorse or promote products derived from this software
17 1.89 itojun * without specific prior written permission.
18 1.89 itojun *
19 1.89 itojun * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 1.89 itojun * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 1.89 itojun * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 1.89 itojun * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 1.89 itojun * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 1.89 itojun * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 1.89 itojun * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 1.89 itojun * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 1.89 itojun * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 1.89 itojun * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 1.89 itojun * SUCH DAMAGE.
30 1.89 itojun */
31 1.76 thorpej
32 1.76 thorpej /*-
33 1.76 thorpej * Copyright (c) 1998 The NetBSD Foundation, Inc.
34 1.76 thorpej * All rights reserved.
35 1.76 thorpej *
36 1.76 thorpej * This code is derived from software contributed to The NetBSD Foundation
37 1.76 thorpej * by Public Access Networks Corporation ("Panix"). It was developed under
38 1.76 thorpej * contract to Panix by Eric Haszlakiewicz and Thor Lancelot Simon.
39 1.76 thorpej *
40 1.76 thorpej * Redistribution and use in source and binary forms, with or without
41 1.76 thorpej * modification, are permitted provided that the following conditions
42 1.76 thorpej * are met:
43 1.76 thorpej * 1. Redistributions of source code must retain the above copyright
44 1.76 thorpej * notice, this list of conditions and the following disclaimer.
45 1.76 thorpej * 2. Redistributions in binary form must reproduce the above copyright
46 1.76 thorpej * notice, this list of conditions and the following disclaimer in the
47 1.76 thorpej * documentation and/or other materials provided with the distribution.
48 1.76 thorpej * 3. All advertising materials mentioning features or use of this software
49 1.76 thorpej * must display the following acknowledgement:
50 1.76 thorpej * This product includes software developed by the NetBSD
51 1.76 thorpej * Foundation, Inc. and its contributors.
52 1.76 thorpej * 4. Neither the name of The NetBSD Foundation nor the names of its
53 1.76 thorpej * contributors may be used to endorse or promote products derived
54 1.76 thorpej * from this software without specific prior written permission.
55 1.76 thorpej *
56 1.76 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
57 1.76 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
58 1.76 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
59 1.76 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
60 1.76 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
61 1.76 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
62 1.76 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
63 1.76 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
64 1.76 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
65 1.76 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
66 1.76 thorpej * POSSIBILITY OF SUCH DAMAGE.
67 1.76 thorpej */
68 1.14 cgd
69 1.1 cgd /*
70 1.13 mycroft * Copyright (c) 1982, 1986, 1988, 1993
71 1.13 mycroft * The Regents of the University of California. All rights reserved.
72 1.1 cgd *
73 1.1 cgd * Redistribution and use in source and binary forms, with or without
74 1.1 cgd * modification, are permitted provided that the following conditions
75 1.1 cgd * are met:
76 1.1 cgd * 1. Redistributions of source code must retain the above copyright
77 1.1 cgd * notice, this list of conditions and the following disclaimer.
78 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
79 1.1 cgd * notice, this list of conditions and the following disclaimer in the
80 1.1 cgd * documentation and/or other materials provided with the distribution.
81 1.1 cgd * 3. All advertising materials mentioning features or use of this software
82 1.1 cgd * must display the following acknowledgement:
83 1.1 cgd * This product includes software developed by the University of
84 1.1 cgd * California, Berkeley and its contributors.
85 1.1 cgd * 4. Neither the name of the University nor the names of its contributors
86 1.1 cgd * may be used to endorse or promote products derived from this software
87 1.1 cgd * without specific prior written permission.
88 1.1 cgd *
89 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
90 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
91 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
92 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
93 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
94 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
95 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
96 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
97 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
98 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
99 1.1 cgd * SUCH DAMAGE.
100 1.1 cgd *
101 1.14 cgd * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
102 1.1 cgd */
103 1.55 scottr
104 1.62 matt #include "opt_gateway.h"
105 1.69 mrg #include "opt_pfil_hooks.h"
106 1.91 thorpej #include "opt_ipsec.h"
107 1.55 scottr #include "opt_mrouting.h"
108 1.1 cgd
109 1.5 mycroft #include <sys/param.h>
110 1.5 mycroft #include <sys/systm.h>
111 1.5 mycroft #include <sys/malloc.h>
112 1.5 mycroft #include <sys/mbuf.h>
113 1.5 mycroft #include <sys/domain.h>
114 1.5 mycroft #include <sys/protosw.h>
115 1.5 mycroft #include <sys/socket.h>
116 1.44 thorpej #include <sys/socketvar.h>
117 1.5 mycroft #include <sys/errno.h>
118 1.5 mycroft #include <sys/time.h>
119 1.5 mycroft #include <sys/kernel.h>
120 1.28 christos #include <sys/proc.h>
121 1.72 thorpej #include <sys/pool.h>
122 1.28 christos
123 1.115 mrg #include <uvm/uvm_extern.h>
124 1.115 mrg
125 1.28 christos #include <sys/sysctl.h>
126 1.1 cgd
127 1.5 mycroft #include <net/if.h>
128 1.44 thorpej #include <net/if_dl.h>
129 1.5 mycroft #include <net/route.h>
130 1.45 mrg #include <net/pfil.h>
131 1.1 cgd
132 1.5 mycroft #include <netinet/in.h>
133 1.5 mycroft #include <netinet/in_systm.h>
134 1.5 mycroft #include <netinet/ip.h>
135 1.5 mycroft #include <netinet/in_pcb.h>
136 1.5 mycroft #include <netinet/in_var.h>
137 1.5 mycroft #include <netinet/ip_var.h>
138 1.5 mycroft #include <netinet/ip_icmp.h>
139 1.89 itojun /* just for gif_ttl */
140 1.89 itojun #include <netinet/in_gif.h>
141 1.89 itojun #include "gif.h"
142 1.111 jdolecek
143 1.111 jdolecek #ifdef MROUTING
144 1.111 jdolecek #include <netinet/ip_mroute.h>
145 1.111 jdolecek #endif
146 1.89 itojun
147 1.89 itojun #ifdef IPSEC
148 1.89 itojun #include <netinet6/ipsec.h>
149 1.89 itojun #include <netkey/key.h>
150 1.89 itojun #include <netkey/key_debug.h>
151 1.89 itojun #endif
152 1.44 thorpej
153 1.1 cgd #ifndef IPFORWARDING
154 1.1 cgd #ifdef GATEWAY
155 1.1 cgd #define IPFORWARDING 1 /* forward IP packets not for us */
156 1.1 cgd #else /* GATEWAY */
157 1.1 cgd #define IPFORWARDING 0 /* don't forward IP packets not for us */
158 1.1 cgd #endif /* GATEWAY */
159 1.1 cgd #endif /* IPFORWARDING */
160 1.1 cgd #ifndef IPSENDREDIRECTS
161 1.1 cgd #define IPSENDREDIRECTS 1
162 1.1 cgd #endif
163 1.26 thorpej #ifndef IPFORWSRCRT
164 1.47 cjs #define IPFORWSRCRT 1 /* forward source-routed packets */
165 1.47 cjs #endif
166 1.47 cjs #ifndef IPALLOWSRCRT
167 1.48 mrg #define IPALLOWSRCRT 1 /* allow source-routed packets */
168 1.26 thorpej #endif
169 1.53 kml #ifndef IPMTUDISC
170 1.53 kml #define IPMTUDISC 0
171 1.53 kml #endif
172 1.60 kml #ifndef IPMTUDISCTIMEOUT
173 1.61 kml #define IPMTUDISCTIMEOUT (10 * 60) /* as per RFC 1191 */
174 1.60 kml #endif
175 1.53 kml
176 1.27 thorpej /*
177 1.27 thorpej * Note: DIRECTED_BROADCAST is handled this way so that previous
178 1.27 thorpej * configuration using this option will Just Work.
179 1.27 thorpej */
180 1.27 thorpej #ifndef IPDIRECTEDBCAST
181 1.27 thorpej #ifdef DIRECTED_BROADCAST
182 1.27 thorpej #define IPDIRECTEDBCAST 1
183 1.27 thorpej #else
184 1.27 thorpej #define IPDIRECTEDBCAST 0
185 1.27 thorpej #endif /* DIRECTED_BROADCAST */
186 1.27 thorpej #endif /* IPDIRECTEDBCAST */
187 1.1 cgd int ipforwarding = IPFORWARDING;
188 1.1 cgd int ipsendredirects = IPSENDREDIRECTS;
189 1.13 mycroft int ip_defttl = IPDEFTTL;
190 1.26 thorpej int ip_forwsrcrt = IPFORWSRCRT;
191 1.27 thorpej int ip_directedbcast = IPDIRECTEDBCAST;
192 1.47 cjs int ip_allowsrcrt = IPALLOWSRCRT;
193 1.53 kml int ip_mtudisc = IPMTUDISC;
194 1.60 kml u_int ip_mtudisc_timeout = IPMTUDISCTIMEOUT;
195 1.1 cgd #ifdef DIAGNOSTIC
196 1.1 cgd int ipprintfs = 0;
197 1.1 cgd #endif
198 1.1 cgd
199 1.60 kml struct rttimer_queue *ip_mtudisc_timeout_q = NULL;
200 1.60 kml
201 1.1 cgd extern struct domain inetdomain;
202 1.1 cgd int ipqmaxlen = IFQ_MAXLEN;
203 1.22 mycroft struct in_ifaddrhead in_ifaddr;
204 1.57 tls struct in_ifaddrhashhead *in_ifaddrhashtbl;
205 1.13 mycroft struct ifqueue ipintrq;
206 1.63 matt struct ipstat ipstat;
207 1.63 matt u_int16_t ip_id;
208 1.75 thorpej
209 1.63 matt struct ipqhead ipq;
210 1.75 thorpej int ipq_locked;
211 1.75 thorpej
212 1.75 thorpej static __inline int ipq_lock_try __P((void));
213 1.75 thorpej static __inline void ipq_unlock __P((void));
214 1.75 thorpej
215 1.75 thorpej static __inline int
216 1.75 thorpej ipq_lock_try()
217 1.75 thorpej {
218 1.75 thorpej int s;
219 1.75 thorpej
220 1.75 thorpej s = splimp();
221 1.75 thorpej if (ipq_locked) {
222 1.75 thorpej splx(s);
223 1.75 thorpej return (0);
224 1.75 thorpej }
225 1.75 thorpej ipq_locked = 1;
226 1.75 thorpej splx(s);
227 1.75 thorpej return (1);
228 1.75 thorpej }
229 1.75 thorpej
230 1.75 thorpej static __inline void
231 1.75 thorpej ipq_unlock()
232 1.75 thorpej {
233 1.75 thorpej int s;
234 1.75 thorpej
235 1.75 thorpej s = splimp();
236 1.75 thorpej ipq_locked = 0;
237 1.75 thorpej splx(s);
238 1.75 thorpej }
239 1.75 thorpej
240 1.75 thorpej #ifdef DIAGNOSTIC
241 1.75 thorpej #define IPQ_LOCK() \
242 1.75 thorpej do { \
243 1.75 thorpej if (ipq_lock_try() == 0) { \
244 1.75 thorpej printf("%s:%d: ipq already locked\n", __FILE__, __LINE__); \
245 1.75 thorpej panic("ipq_lock"); \
246 1.75 thorpej } \
247 1.75 thorpej } while (0)
248 1.75 thorpej #define IPQ_LOCK_CHECK() \
249 1.75 thorpej do { \
250 1.75 thorpej if (ipq_locked == 0) { \
251 1.75 thorpej printf("%s:%d: ipq lock not held\n", __FILE__, __LINE__); \
252 1.75 thorpej panic("ipq lock check"); \
253 1.75 thorpej } \
254 1.75 thorpej } while (0)
255 1.75 thorpej #else
256 1.75 thorpej #define IPQ_LOCK() (void) ipq_lock_try()
257 1.75 thorpej #define IPQ_LOCK_CHECK() /* nothing */
258 1.75 thorpej #endif
259 1.75 thorpej
260 1.75 thorpej #define IPQ_UNLOCK() ipq_unlock()
261 1.1 cgd
262 1.72 thorpej struct pool ipqent_pool;
263 1.72 thorpej
264 1.1 cgd /*
265 1.1 cgd * We need to save the IP options in case a protocol wants to respond
266 1.1 cgd * to an incoming packet over the same route if the packet got here
267 1.1 cgd * using IP source routing. This allows connection establishment and
268 1.1 cgd * maintenance when the remote end is on a network that is not known
269 1.1 cgd * to us.
270 1.1 cgd */
271 1.1 cgd int ip_nhops = 0;
272 1.1 cgd static struct ip_srcrt {
273 1.1 cgd struct in_addr dst; /* final destination */
274 1.1 cgd char nop; /* one NOP to align */
275 1.1 cgd char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
276 1.1 cgd struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
277 1.1 cgd } ip_srcrt;
278 1.1 cgd
279 1.13 mycroft static void save_rte __P((u_char *, struct in_addr));
280 1.35 mycroft
281 1.1 cgd /*
282 1.1 cgd * IP initialization: fill in IP protocol switch table.
283 1.1 cgd * All protocols not implemented in kernel go to raw IP protocol handler.
284 1.1 cgd */
285 1.8 mycroft void
286 1.1 cgd ip_init()
287 1.1 cgd {
288 1.109 augustss struct protosw *pr;
289 1.109 augustss int i;
290 1.1 cgd
291 1.72 thorpej pool_init(&ipqent_pool, sizeof(struct ipqent), 0, 0, 0, "ipqepl",
292 1.72 thorpej 0, NULL, NULL, M_IPQ);
293 1.72 thorpej
294 1.1 cgd pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
295 1.1 cgd if (pr == 0)
296 1.1 cgd panic("ip_init");
297 1.1 cgd for (i = 0; i < IPPROTO_MAX; i++)
298 1.1 cgd ip_protox[i] = pr - inetsw;
299 1.1 cgd for (pr = inetdomain.dom_protosw;
300 1.1 cgd pr < inetdomain.dom_protoswNPROTOSW; pr++)
301 1.1 cgd if (pr->pr_domain->dom_family == PF_INET &&
302 1.1 cgd pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
303 1.1 cgd ip_protox[pr->pr_protocol] = pr - inetsw;
304 1.25 cgd LIST_INIT(&ipq);
305 1.1 cgd ip_id = time.tv_sec & 0xffff;
306 1.1 cgd ipintrq.ifq_maxlen = ipqmaxlen;
307 1.22 mycroft TAILQ_INIT(&in_ifaddr);
308 1.57 tls in_ifaddrhashtbl =
309 1.57 tls hashinit(IN_IFADDR_HASH_SIZE, M_IFADDR, M_WAITOK, &in_ifaddrhash);
310 1.60 kml if (ip_mtudisc != 0)
311 1.60 kml ip_mtudisc_timeout_q =
312 1.60 kml rt_timer_queue_create(ip_mtudisc_timeout);
313 1.73 thorpej #ifdef GATEWAY
314 1.73 thorpej ipflow_init();
315 1.73 thorpej #endif
316 1.1 cgd }
317 1.1 cgd
318 1.1 cgd struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
319 1.1 cgd struct route ipforward_rt;
320 1.1 cgd
321 1.1 cgd /*
322 1.89 itojun * IP software interrupt routine
323 1.89 itojun */
324 1.89 itojun void
325 1.89 itojun ipintr()
326 1.89 itojun {
327 1.89 itojun int s;
328 1.89 itojun struct mbuf *m;
329 1.89 itojun
330 1.89 itojun while (1) {
331 1.89 itojun s = splimp();
332 1.89 itojun IF_DEQUEUE(&ipintrq, m);
333 1.89 itojun splx(s);
334 1.89 itojun if (m == 0)
335 1.89 itojun return;
336 1.89 itojun ip_input(m);
337 1.89 itojun }
338 1.89 itojun }
339 1.89 itojun
340 1.89 itojun /*
341 1.1 cgd * Ip input routine. Checksum and byte swap header. If fragmented
342 1.1 cgd * try to reassemble. Process options. Pass to next level.
343 1.1 cgd */
344 1.8 mycroft void
345 1.89 itojun ip_input(struct mbuf *m)
346 1.1 cgd {
347 1.109 augustss struct ip *ip = NULL;
348 1.109 augustss struct ipq *fp;
349 1.109 augustss struct in_ifaddr *ia;
350 1.109 augustss struct ifaddr *ifa;
351 1.25 cgd struct ipqent *ipqe;
352 1.89 itojun int hlen = 0, mff, len;
353 1.100 itojun int downmatch;
354 1.36 mrg #ifdef PFIL_HOOKS
355 1.33 mrg struct packet_filter_hook *pfh;
356 1.33 mrg struct mbuf *m0;
357 1.43 mrg int rv;
358 1.36 mrg #endif /* PFIL_HOOKS */
359 1.1 cgd
360 1.1 cgd #ifdef DIAGNOSTIC
361 1.1 cgd if ((m->m_flags & M_PKTHDR) == 0)
362 1.1 cgd panic("ipintr no HDR");
363 1.1 cgd #endif
364 1.89 itojun #ifdef IPSEC
365 1.89 itojun /*
366 1.89 itojun * should the inner packet be considered authentic?
367 1.89 itojun * see comment in ah4_input().
368 1.89 itojun */
369 1.89 itojun if (m) {
370 1.89 itojun m->m_flags &= ~M_AUTHIPHDR;
371 1.89 itojun m->m_flags &= ~M_AUTHIPDGM;
372 1.89 itojun }
373 1.89 itojun #endif
374 1.1 cgd /*
375 1.1 cgd * If no IP addresses have been set yet but the interfaces
376 1.1 cgd * are receiving, can't do anything with incoming packets yet.
377 1.1 cgd */
378 1.22 mycroft if (in_ifaddr.tqh_first == 0)
379 1.1 cgd goto bad;
380 1.1 cgd ipstat.ips_total++;
381 1.1 cgd if (m->m_len < sizeof (struct ip) &&
382 1.1 cgd (m = m_pullup(m, sizeof (struct ip))) == 0) {
383 1.1 cgd ipstat.ips_toosmall++;
384 1.89 itojun return;
385 1.1 cgd }
386 1.1 cgd ip = mtod(m, struct ip *);
387 1.13 mycroft if (ip->ip_v != IPVERSION) {
388 1.13 mycroft ipstat.ips_badvers++;
389 1.13 mycroft goto bad;
390 1.13 mycroft }
391 1.1 cgd hlen = ip->ip_hl << 2;
392 1.1 cgd if (hlen < sizeof(struct ip)) { /* minimum header length */
393 1.1 cgd ipstat.ips_badhlen++;
394 1.1 cgd goto bad;
395 1.1 cgd }
396 1.1 cgd if (hlen > m->m_len) {
397 1.1 cgd if ((m = m_pullup(m, hlen)) == 0) {
398 1.1 cgd ipstat.ips_badhlen++;
399 1.89 itojun return;
400 1.1 cgd }
401 1.1 cgd ip = mtod(m, struct ip *);
402 1.1 cgd }
403 1.98 thorpej
404 1.85 hwr /*
405 1.99 thorpej * RFC1122: packets with a multicast source address are
406 1.98 thorpej * not allowed.
407 1.85 hwr */
408 1.85 hwr if (IN_MULTICAST(ip->ip_src.s_addr)) {
409 1.98 thorpej /* XXX stat */
410 1.85 hwr goto bad;
411 1.85 hwr }
412 1.85 hwr
413 1.78 mycroft if (in_cksum(m, hlen) != 0) {
414 1.1 cgd ipstat.ips_badsum++;
415 1.1 cgd goto bad;
416 1.1 cgd }
417 1.1 cgd
418 1.1 cgd /*
419 1.1 cgd * Convert fields to host representation.
420 1.1 cgd */
421 1.1 cgd NTOHS(ip->ip_len);
422 1.1 cgd NTOHS(ip->ip_off);
423 1.35 mycroft len = ip->ip_len;
424 1.81 proff
425 1.81 proff /*
426 1.81 proff * Check for additional length bogosity
427 1.81 proff */
428 1.84 proff if (len < hlen) {
429 1.81 proff ipstat.ips_badlen++;
430 1.81 proff goto bad;
431 1.81 proff }
432 1.1 cgd
433 1.1 cgd /*
434 1.1 cgd * Check that the amount of data in the buffers
435 1.1 cgd * is as at least much as the IP header would have us expect.
436 1.1 cgd * Trim mbufs if longer than we expect.
437 1.1 cgd * Drop packet if shorter than we expect.
438 1.1 cgd */
439 1.35 mycroft if (m->m_pkthdr.len < len) {
440 1.1 cgd ipstat.ips_tooshort++;
441 1.1 cgd goto bad;
442 1.1 cgd }
443 1.35 mycroft if (m->m_pkthdr.len > len) {
444 1.1 cgd if (m->m_len == m->m_pkthdr.len) {
445 1.35 mycroft m->m_len = len;
446 1.35 mycroft m->m_pkthdr.len = len;
447 1.1 cgd } else
448 1.35 mycroft m_adj(m, len - m->m_pkthdr.len);
449 1.1 cgd }
450 1.1 cgd
451 1.94 itojun #ifdef IPSEC
452 1.94 itojun /* ipflow (IP fast fowarding) is not compatible with IPsec. */
453 1.94 itojun m->m_flags &= ~M_CANFASTFWD;
454 1.94 itojun #else
455 1.64 thorpej /*
456 1.64 thorpej * Assume that we can create a fast-forward IP flow entry
457 1.64 thorpej * based on this packet.
458 1.64 thorpej */
459 1.64 thorpej m->m_flags |= M_CANFASTFWD;
460 1.94 itojun #endif
461 1.64 thorpej
462 1.36 mrg #ifdef PFIL_HOOKS
463 1.33 mrg /*
464 1.64 thorpej * Run through list of hooks for input packets. If there are any
465 1.64 thorpej * filters which require that additional packets in the flow are
466 1.64 thorpej * not fast-forwarded, they must clear the M_CANFASTFWD flag.
467 1.64 thorpej * Note that filters must _never_ set this flag, as another filter
468 1.64 thorpej * in the list may have previously cleared it.
469 1.33 mrg */
470 1.33 mrg m0 = m;
471 1.102 darrenr pfh = pfil_hook_get(PFIL_IN, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
472 1.101 darrenr for (; pfh; pfh = pfh->pfil_link.tqe_next)
473 1.33 mrg if (pfh->pfil_func) {
474 1.101 darrenr rv = pfh->pfil_func(ip, hlen,
475 1.101 darrenr m->m_pkthdr.rcvif, 0, &m0);
476 1.43 mrg if (rv)
477 1.89 itojun return;
478 1.68 sommerfe m = m0;
479 1.68 sommerfe if (m == NULL)
480 1.89 itojun return;
481 1.68 sommerfe ip = mtod(m, struct ip *);
482 1.33 mrg }
483 1.36 mrg #endif /* PFIL_HOOKS */
484 1.33 mrg
485 1.1 cgd /*
486 1.1 cgd * Process options and, if not destined for us,
487 1.1 cgd * ship it on. ip_dooptions returns 1 when an
488 1.1 cgd * error was detected (causing an icmp message
489 1.1 cgd * to be sent and the original packet to be freed).
490 1.1 cgd */
491 1.1 cgd ip_nhops = 0; /* for source routed packets */
492 1.1 cgd if (hlen > sizeof (struct ip) && ip_dooptions(m))
493 1.89 itojun return;
494 1.1 cgd
495 1.1 cgd /*
496 1.1 cgd * Check our list of addresses, to see if the packet is for us.
497 1.100 itojun *
498 1.100 itojun * Traditional 4.4BSD did not consult IFF_UP at all.
499 1.100 itojun * The behavior here is to treat addresses on !IFF_UP interface
500 1.100 itojun * as not mine.
501 1.1 cgd */
502 1.100 itojun downmatch = 0;
503 1.97 itojun for (ia = IN_IFADDR_HASH(ip->ip_dst.s_addr).lh_first;
504 1.97 itojun ia != NULL;
505 1.97 itojun ia = ia->ia_hash.le_next) {
506 1.97 itojun if (in_hosteq(ia->ia_addr.sin_addr, ip->ip_dst)) {
507 1.97 itojun if ((ia->ia_ifp->if_flags & IFF_UP) != 0)
508 1.97 itojun break;
509 1.100 itojun else
510 1.100 itojun downmatch++;
511 1.97 itojun }
512 1.97 itojun }
513 1.86 thorpej if (ia != NULL)
514 1.86 thorpej goto ours;
515 1.57 tls if (m->m_pkthdr.rcvif->if_flags & IFF_BROADCAST) {
516 1.57 tls for (ifa = m->m_pkthdr.rcvif->if_addrlist.tqh_first;
517 1.57 tls ifa != NULL; ifa = ifa->ifa_list.tqe_next) {
518 1.57 tls if (ifa->ifa_addr->sa_family != AF_INET) continue;
519 1.57 tls ia = ifatoia(ifa);
520 1.35 mycroft if (in_hosteq(ip->ip_dst, ia->ia_broadaddr.sin_addr) ||
521 1.35 mycroft in_hosteq(ip->ip_dst, ia->ia_netbroadcast) ||
522 1.20 mycroft /*
523 1.20 mycroft * Look for all-0's host part (old broadcast addr),
524 1.20 mycroft * either for subnet or net.
525 1.20 mycroft */
526 1.20 mycroft ip->ip_dst.s_addr == ia->ia_subnet ||
527 1.18 mycroft ip->ip_dst.s_addr == ia->ia_net)
528 1.1 cgd goto ours;
529 1.57 tls /*
530 1.57 tls * An interface with IP address zero accepts
531 1.57 tls * all packets that arrive on that interface.
532 1.57 tls */
533 1.57 tls if (in_nullhost(ia->ia_addr.sin_addr))
534 1.57 tls goto ours;
535 1.1 cgd }
536 1.1 cgd }
537 1.18 mycroft if (IN_MULTICAST(ip->ip_dst.s_addr)) {
538 1.4 hpeyerl struct in_multi *inm;
539 1.4 hpeyerl #ifdef MROUTING
540 1.4 hpeyerl extern struct socket *ip_mrouter;
541 1.10 brezak
542 1.10 brezak if (m->m_flags & M_EXT) {
543 1.10 brezak if ((m = m_pullup(m, hlen)) == 0) {
544 1.10 brezak ipstat.ips_toosmall++;
545 1.89 itojun return;
546 1.10 brezak }
547 1.10 brezak ip = mtod(m, struct ip *);
548 1.10 brezak }
549 1.4 hpeyerl
550 1.4 hpeyerl if (ip_mrouter) {
551 1.4 hpeyerl /*
552 1.4 hpeyerl * If we are acting as a multicast router, all
553 1.4 hpeyerl * incoming multicast packets are passed to the
554 1.4 hpeyerl * kernel-level multicast forwarding function.
555 1.4 hpeyerl * The packet is returned (relatively) intact; if
556 1.4 hpeyerl * ip_mforward() returns a non-zero value, the packet
557 1.4 hpeyerl * must be discarded, else it may be accepted below.
558 1.4 hpeyerl *
559 1.4 hpeyerl * (The IP ident field is put in the same byte order
560 1.4 hpeyerl * as expected when ip_mforward() is called from
561 1.4 hpeyerl * ip_output().)
562 1.4 hpeyerl */
563 1.13 mycroft if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
564 1.13 mycroft ipstat.ips_cantforward++;
565 1.4 hpeyerl m_freem(m);
566 1.89 itojun return;
567 1.4 hpeyerl }
568 1.4 hpeyerl
569 1.4 hpeyerl /*
570 1.4 hpeyerl * The process-level routing demon needs to receive
571 1.4 hpeyerl * all multicast IGMP packets, whether or not this
572 1.4 hpeyerl * host belongs to their destination groups.
573 1.4 hpeyerl */
574 1.4 hpeyerl if (ip->ip_p == IPPROTO_IGMP)
575 1.4 hpeyerl goto ours;
576 1.13 mycroft ipstat.ips_forward++;
577 1.4 hpeyerl }
578 1.4 hpeyerl #endif
579 1.4 hpeyerl /*
580 1.4 hpeyerl * See if we belong to the destination multicast group on the
581 1.4 hpeyerl * arrival interface.
582 1.4 hpeyerl */
583 1.4 hpeyerl IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
584 1.4 hpeyerl if (inm == NULL) {
585 1.13 mycroft ipstat.ips_cantforward++;
586 1.4 hpeyerl m_freem(m);
587 1.89 itojun return;
588 1.4 hpeyerl }
589 1.4 hpeyerl goto ours;
590 1.4 hpeyerl }
591 1.19 mycroft if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
592 1.35 mycroft in_nullhost(ip->ip_dst))
593 1.1 cgd goto ours;
594 1.1 cgd
595 1.1 cgd /*
596 1.1 cgd * Not for us; forward if possible and desirable.
597 1.1 cgd */
598 1.1 cgd if (ipforwarding == 0) {
599 1.1 cgd ipstat.ips_cantforward++;
600 1.1 cgd m_freem(m);
601 1.100 itojun } else {
602 1.100 itojun /*
603 1.100 itojun * If ip_dst matched any of my address on !IFF_UP interface,
604 1.100 itojun * and there's no IFF_UP interface that matches ip_dst,
605 1.100 itojun * send icmp unreach. Forwarding it will result in in-kernel
606 1.100 itojun * forwarding loop till TTL goes to 0.
607 1.100 itojun */
608 1.100 itojun if (downmatch) {
609 1.100 itojun icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
610 1.100 itojun ipstat.ips_cantforward++;
611 1.100 itojun return;
612 1.100 itojun }
613 1.1 cgd ip_forward(m, 0);
614 1.100 itojun }
615 1.89 itojun return;
616 1.1 cgd
617 1.1 cgd ours:
618 1.1 cgd /*
619 1.1 cgd * If offset or IP_MF are set, must reassemble.
620 1.1 cgd * Otherwise, nothing need be done.
621 1.1 cgd * (We could look in the reassembly queue to see
622 1.1 cgd * if the packet was previously fragmented,
623 1.1 cgd * but it's not worth the time; just let them time out.)
624 1.1 cgd */
625 1.37 perry if (ip->ip_off & ~(IP_DF|IP_RF)) {
626 1.1 cgd /*
627 1.1 cgd * Look for queue of fragments
628 1.1 cgd * of this datagram.
629 1.1 cgd */
630 1.75 thorpej IPQ_LOCK();
631 1.25 cgd for (fp = ipq.lh_first; fp != NULL; fp = fp->ipq_q.le_next)
632 1.1 cgd if (ip->ip_id == fp->ipq_id &&
633 1.35 mycroft in_hosteq(ip->ip_src, fp->ipq_src) &&
634 1.35 mycroft in_hosteq(ip->ip_dst, fp->ipq_dst) &&
635 1.1 cgd ip->ip_p == fp->ipq_p)
636 1.1 cgd goto found;
637 1.1 cgd fp = 0;
638 1.1 cgd found:
639 1.1 cgd
640 1.1 cgd /*
641 1.1 cgd * Adjust ip_len to not reflect header,
642 1.25 cgd * set ipqe_mff if more fragments are expected,
643 1.1 cgd * convert offset of this to bytes.
644 1.1 cgd */
645 1.1 cgd ip->ip_len -= hlen;
646 1.25 cgd mff = (ip->ip_off & IP_MF) != 0;
647 1.25 cgd if (mff) {
648 1.16 cgd /*
649 1.16 cgd * Make sure that fragments have a data length
650 1.16 cgd * that's a non-zero multiple of 8 bytes.
651 1.16 cgd */
652 1.17 cgd if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
653 1.16 cgd ipstat.ips_badfrags++;
654 1.75 thorpej IPQ_UNLOCK();
655 1.16 cgd goto bad;
656 1.16 cgd }
657 1.16 cgd }
658 1.1 cgd ip->ip_off <<= 3;
659 1.1 cgd
660 1.1 cgd /*
661 1.1 cgd * If datagram marked as having more fragments
662 1.1 cgd * or if this is not the first fragment,
663 1.1 cgd * attempt reassembly; if it succeeds, proceed.
664 1.1 cgd */
665 1.25 cgd if (mff || ip->ip_off) {
666 1.1 cgd ipstat.ips_fragments++;
667 1.72 thorpej ipqe = pool_get(&ipqent_pool, PR_NOWAIT);
668 1.25 cgd if (ipqe == NULL) {
669 1.25 cgd ipstat.ips_rcvmemdrop++;
670 1.75 thorpej IPQ_UNLOCK();
671 1.25 cgd goto bad;
672 1.25 cgd }
673 1.25 cgd ipqe->ipqe_mff = mff;
674 1.50 thorpej ipqe->ipqe_m = m;
675 1.25 cgd ipqe->ipqe_ip = ip;
676 1.50 thorpej m = ip_reass(ipqe, fp);
677 1.75 thorpej if (m == 0) {
678 1.75 thorpej IPQ_UNLOCK();
679 1.89 itojun return;
680 1.75 thorpej }
681 1.13 mycroft ipstat.ips_reassembled++;
682 1.50 thorpej ip = mtod(m, struct ip *);
683 1.74 thorpej hlen = ip->ip_hl << 2;
684 1.79 mycroft ip->ip_len += hlen;
685 1.1 cgd } else
686 1.1 cgd if (fp)
687 1.1 cgd ip_freef(fp);
688 1.75 thorpej IPQ_UNLOCK();
689 1.79 mycroft }
690 1.1 cgd
691 1.1 cgd /*
692 1.1 cgd * Switch out to protocol's input routine.
693 1.1 cgd */
694 1.82 aidan #if IFA_STATS
695 1.82 aidan ia->ia_ifa.ifa_data.ifad_inbytes += ip->ip_len;
696 1.82 aidan #endif
697 1.1 cgd ipstat.ips_delivered++;
698 1.89 itojun {
699 1.89 itojun int off = hlen, nh = ip->ip_p;
700 1.89 itojun
701 1.89 itojun (*inetsw[ip_protox[nh]].pr_input)(m, off, nh);
702 1.89 itojun return;
703 1.89 itojun }
704 1.1 cgd bad:
705 1.1 cgd m_freem(m);
706 1.1 cgd }
707 1.1 cgd
708 1.1 cgd /*
709 1.1 cgd * Take incoming datagram fragment and try to
710 1.1 cgd * reassemble it into whole datagram. If a chain for
711 1.1 cgd * reassembly of this datagram already exists, then it
712 1.1 cgd * is given as fp; otherwise have to make a chain.
713 1.1 cgd */
714 1.50 thorpej struct mbuf *
715 1.25 cgd ip_reass(ipqe, fp)
716 1.109 augustss struct ipqent *ipqe;
717 1.109 augustss struct ipq *fp;
718 1.1 cgd {
719 1.109 augustss struct mbuf *m = ipqe->ipqe_m;
720 1.109 augustss struct ipqent *nq, *p, *q;
721 1.25 cgd struct ip *ip;
722 1.1 cgd struct mbuf *t;
723 1.25 cgd int hlen = ipqe->ipqe_ip->ip_hl << 2;
724 1.1 cgd int i, next;
725 1.1 cgd
726 1.75 thorpej IPQ_LOCK_CHECK();
727 1.75 thorpej
728 1.1 cgd /*
729 1.1 cgd * Presence of header sizes in mbufs
730 1.1 cgd * would confuse code below.
731 1.1 cgd */
732 1.1 cgd m->m_data += hlen;
733 1.1 cgd m->m_len -= hlen;
734 1.1 cgd
735 1.1 cgd /*
736 1.1 cgd * If first fragment to arrive, create a reassembly queue.
737 1.1 cgd */
738 1.1 cgd if (fp == 0) {
739 1.50 thorpej MALLOC(fp, struct ipq *, sizeof (struct ipq),
740 1.50 thorpej M_FTABLE, M_NOWAIT);
741 1.50 thorpej if (fp == NULL)
742 1.1 cgd goto dropfrag;
743 1.25 cgd LIST_INSERT_HEAD(&ipq, fp, ipq_q);
744 1.1 cgd fp->ipq_ttl = IPFRAGTTL;
745 1.25 cgd fp->ipq_p = ipqe->ipqe_ip->ip_p;
746 1.25 cgd fp->ipq_id = ipqe->ipqe_ip->ip_id;
747 1.25 cgd LIST_INIT(&fp->ipq_fragq);
748 1.25 cgd fp->ipq_src = ipqe->ipqe_ip->ip_src;
749 1.25 cgd fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
750 1.25 cgd p = NULL;
751 1.1 cgd goto insert;
752 1.1 cgd }
753 1.1 cgd
754 1.1 cgd /*
755 1.1 cgd * Find a segment which begins after this one does.
756 1.1 cgd */
757 1.25 cgd for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
758 1.25 cgd p = q, q = q->ipqe_q.le_next)
759 1.25 cgd if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off)
760 1.1 cgd break;
761 1.1 cgd
762 1.1 cgd /*
763 1.1 cgd * If there is a preceding segment, it may provide some of
764 1.1 cgd * our data already. If so, drop the data from the incoming
765 1.1 cgd * segment. If it provides all of our data, drop us.
766 1.1 cgd */
767 1.25 cgd if (p != NULL) {
768 1.25 cgd i = p->ipqe_ip->ip_off + p->ipqe_ip->ip_len -
769 1.25 cgd ipqe->ipqe_ip->ip_off;
770 1.1 cgd if (i > 0) {
771 1.25 cgd if (i >= ipqe->ipqe_ip->ip_len)
772 1.1 cgd goto dropfrag;
773 1.50 thorpej m_adj(ipqe->ipqe_m, i);
774 1.25 cgd ipqe->ipqe_ip->ip_off += i;
775 1.25 cgd ipqe->ipqe_ip->ip_len -= i;
776 1.1 cgd }
777 1.1 cgd }
778 1.1 cgd
779 1.1 cgd /*
780 1.1 cgd * While we overlap succeeding segments trim them or,
781 1.1 cgd * if they are completely covered, dequeue them.
782 1.1 cgd */
783 1.25 cgd for (; q != NULL && ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len >
784 1.25 cgd q->ipqe_ip->ip_off; q = nq) {
785 1.25 cgd i = (ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len) -
786 1.25 cgd q->ipqe_ip->ip_off;
787 1.25 cgd if (i < q->ipqe_ip->ip_len) {
788 1.25 cgd q->ipqe_ip->ip_len -= i;
789 1.25 cgd q->ipqe_ip->ip_off += i;
790 1.50 thorpej m_adj(q->ipqe_m, i);
791 1.1 cgd break;
792 1.1 cgd }
793 1.25 cgd nq = q->ipqe_q.le_next;
794 1.50 thorpej m_freem(q->ipqe_m);
795 1.25 cgd LIST_REMOVE(q, ipqe_q);
796 1.72 thorpej pool_put(&ipqent_pool, q);
797 1.1 cgd }
798 1.1 cgd
799 1.1 cgd insert:
800 1.1 cgd /*
801 1.1 cgd * Stick new segment in its place;
802 1.1 cgd * check for complete reassembly.
803 1.1 cgd */
804 1.25 cgd if (p == NULL) {
805 1.25 cgd LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
806 1.25 cgd } else {
807 1.25 cgd LIST_INSERT_AFTER(p, ipqe, ipqe_q);
808 1.25 cgd }
809 1.1 cgd next = 0;
810 1.25 cgd for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
811 1.25 cgd p = q, q = q->ipqe_q.le_next) {
812 1.25 cgd if (q->ipqe_ip->ip_off != next)
813 1.1 cgd return (0);
814 1.25 cgd next += q->ipqe_ip->ip_len;
815 1.1 cgd }
816 1.25 cgd if (p->ipqe_mff)
817 1.1 cgd return (0);
818 1.1 cgd
819 1.1 cgd /*
820 1.41 thorpej * Reassembly is complete. Check for a bogus message size and
821 1.41 thorpej * concatenate fragments.
822 1.1 cgd */
823 1.25 cgd q = fp->ipq_fragq.lh_first;
824 1.25 cgd ip = q->ipqe_ip;
825 1.41 thorpej if ((next + (ip->ip_hl << 2)) > IP_MAXPACKET) {
826 1.41 thorpej ipstat.ips_toolong++;
827 1.41 thorpej ip_freef(fp);
828 1.41 thorpej return (0);
829 1.41 thorpej }
830 1.50 thorpej m = q->ipqe_m;
831 1.1 cgd t = m->m_next;
832 1.1 cgd m->m_next = 0;
833 1.1 cgd m_cat(m, t);
834 1.25 cgd nq = q->ipqe_q.le_next;
835 1.72 thorpej pool_put(&ipqent_pool, q);
836 1.25 cgd for (q = nq; q != NULL; q = nq) {
837 1.50 thorpej t = q->ipqe_m;
838 1.25 cgd nq = q->ipqe_q.le_next;
839 1.72 thorpej pool_put(&ipqent_pool, q);
840 1.1 cgd m_cat(m, t);
841 1.1 cgd }
842 1.1 cgd
843 1.1 cgd /*
844 1.1 cgd * Create header for new ip packet by
845 1.1 cgd * modifying header of first packet;
846 1.1 cgd * dequeue and discard fragment reassembly header.
847 1.1 cgd * Make header visible.
848 1.1 cgd */
849 1.1 cgd ip->ip_len = next;
850 1.25 cgd ip->ip_src = fp->ipq_src;
851 1.25 cgd ip->ip_dst = fp->ipq_dst;
852 1.25 cgd LIST_REMOVE(fp, ipq_q);
853 1.50 thorpej FREE(fp, M_FTABLE);
854 1.1 cgd m->m_len += (ip->ip_hl << 2);
855 1.1 cgd m->m_data -= (ip->ip_hl << 2);
856 1.1 cgd /* some debugging cruft by sklower, below, will go away soon */
857 1.1 cgd if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
858 1.109 augustss int plen = 0;
859 1.50 thorpej for (t = m; t; t = t->m_next)
860 1.50 thorpej plen += t->m_len;
861 1.50 thorpej m->m_pkthdr.len = plen;
862 1.1 cgd }
863 1.50 thorpej return (m);
864 1.1 cgd
865 1.1 cgd dropfrag:
866 1.1 cgd ipstat.ips_fragdropped++;
867 1.1 cgd m_freem(m);
868 1.72 thorpej pool_put(&ipqent_pool, ipqe);
869 1.1 cgd return (0);
870 1.1 cgd }
871 1.1 cgd
872 1.1 cgd /*
873 1.1 cgd * Free a fragment reassembly header and all
874 1.1 cgd * associated datagrams.
875 1.1 cgd */
876 1.8 mycroft void
877 1.1 cgd ip_freef(fp)
878 1.1 cgd struct ipq *fp;
879 1.1 cgd {
880 1.109 augustss struct ipqent *q, *p;
881 1.1 cgd
882 1.75 thorpej IPQ_LOCK_CHECK();
883 1.75 thorpej
884 1.25 cgd for (q = fp->ipq_fragq.lh_first; q != NULL; q = p) {
885 1.25 cgd p = q->ipqe_q.le_next;
886 1.50 thorpej m_freem(q->ipqe_m);
887 1.25 cgd LIST_REMOVE(q, ipqe_q);
888 1.72 thorpej pool_put(&ipqent_pool, q);
889 1.1 cgd }
890 1.25 cgd LIST_REMOVE(fp, ipq_q);
891 1.50 thorpej FREE(fp, M_FTABLE);
892 1.1 cgd }
893 1.1 cgd
894 1.1 cgd /*
895 1.1 cgd * IP timer processing;
896 1.1 cgd * if a timer expires on a reassembly
897 1.1 cgd * queue, discard it.
898 1.1 cgd */
899 1.8 mycroft void
900 1.1 cgd ip_slowtimo()
901 1.1 cgd {
902 1.109 augustss struct ipq *fp, *nfp;
903 1.24 mycroft int s = splsoftnet();
904 1.1 cgd
905 1.75 thorpej IPQ_LOCK();
906 1.25 cgd for (fp = ipq.lh_first; fp != NULL; fp = nfp) {
907 1.25 cgd nfp = fp->ipq_q.le_next;
908 1.25 cgd if (--fp->ipq_ttl == 0) {
909 1.1 cgd ipstat.ips_fragtimeout++;
910 1.25 cgd ip_freef(fp);
911 1.1 cgd }
912 1.1 cgd }
913 1.75 thorpej IPQ_UNLOCK();
914 1.63 matt #ifdef GATEWAY
915 1.63 matt ipflow_slowtimo();
916 1.63 matt #endif
917 1.1 cgd splx(s);
918 1.1 cgd }
919 1.1 cgd
920 1.1 cgd /*
921 1.1 cgd * Drain off all datagram fragments.
922 1.1 cgd */
923 1.8 mycroft void
924 1.1 cgd ip_drain()
925 1.1 cgd {
926 1.1 cgd
927 1.75 thorpej /*
928 1.75 thorpej * We may be called from a device's interrupt context. If
929 1.75 thorpej * the ipq is already busy, just bail out now.
930 1.75 thorpej */
931 1.75 thorpej if (ipq_lock_try() == 0)
932 1.75 thorpej return;
933 1.75 thorpej
934 1.25 cgd while (ipq.lh_first != NULL) {
935 1.1 cgd ipstat.ips_fragdropped++;
936 1.25 cgd ip_freef(ipq.lh_first);
937 1.1 cgd }
938 1.75 thorpej
939 1.75 thorpej IPQ_UNLOCK();
940 1.1 cgd }
941 1.1 cgd
942 1.1 cgd /*
943 1.1 cgd * Do option processing on a datagram,
944 1.1 cgd * possibly discarding it if bad options are encountered,
945 1.1 cgd * or forwarding it if source-routed.
946 1.1 cgd * Returns 1 if packet has been forwarded/freed,
947 1.1 cgd * 0 if the packet should be processed further.
948 1.1 cgd */
949 1.8 mycroft int
950 1.1 cgd ip_dooptions(m)
951 1.1 cgd struct mbuf *m;
952 1.1 cgd {
953 1.109 augustss struct ip *ip = mtod(m, struct ip *);
954 1.109 augustss u_char *cp, *cp0;
955 1.109 augustss struct ip_timestamp *ipt;
956 1.109 augustss struct in_ifaddr *ia;
957 1.1 cgd int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
958 1.104 thorpej struct in_addr dst;
959 1.1 cgd n_time ntime;
960 1.1 cgd
961 1.13 mycroft dst = ip->ip_dst;
962 1.1 cgd cp = (u_char *)(ip + 1);
963 1.1 cgd cnt = (ip->ip_hl << 2) - sizeof (struct ip);
964 1.1 cgd for (; cnt > 0; cnt -= optlen, cp += optlen) {
965 1.1 cgd opt = cp[IPOPT_OPTVAL];
966 1.1 cgd if (opt == IPOPT_EOL)
967 1.1 cgd break;
968 1.1 cgd if (opt == IPOPT_NOP)
969 1.1 cgd optlen = 1;
970 1.1 cgd else {
971 1.113 itojun if (cnt < IPOPT_OLEN + sizeof(*cp)) {
972 1.113 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
973 1.113 itojun goto bad;
974 1.113 itojun }
975 1.1 cgd optlen = cp[IPOPT_OLEN];
976 1.114 itojun if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt) {
977 1.1 cgd code = &cp[IPOPT_OLEN] - (u_char *)ip;
978 1.1 cgd goto bad;
979 1.1 cgd }
980 1.1 cgd }
981 1.1 cgd switch (opt) {
982 1.1 cgd
983 1.1 cgd default:
984 1.1 cgd break;
985 1.1 cgd
986 1.1 cgd /*
987 1.1 cgd * Source routing with record.
988 1.1 cgd * Find interface with current destination address.
989 1.1 cgd * If none on this machine then drop if strictly routed,
990 1.1 cgd * or do nothing if loosely routed.
991 1.1 cgd * Record interface address and bring up next address
992 1.1 cgd * component. If strictly routed make sure next
993 1.1 cgd * address is on directly accessible net.
994 1.1 cgd */
995 1.1 cgd case IPOPT_LSRR:
996 1.1 cgd case IPOPT_SSRR:
997 1.47 cjs if (ip_allowsrcrt == 0) {
998 1.47 cjs type = ICMP_UNREACH;
999 1.47 cjs code = ICMP_UNREACH_NET_PROHIB;
1000 1.47 cjs goto bad;
1001 1.47 cjs }
1002 1.114 itojun if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1003 1.114 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1004 1.114 itojun goto bad;
1005 1.114 itojun }
1006 1.1 cgd if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1007 1.1 cgd code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1008 1.1 cgd goto bad;
1009 1.1 cgd }
1010 1.1 cgd ipaddr.sin_addr = ip->ip_dst;
1011 1.19 mycroft ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
1012 1.1 cgd if (ia == 0) {
1013 1.1 cgd if (opt == IPOPT_SSRR) {
1014 1.1 cgd type = ICMP_UNREACH;
1015 1.1 cgd code = ICMP_UNREACH_SRCFAIL;
1016 1.1 cgd goto bad;
1017 1.1 cgd }
1018 1.1 cgd /*
1019 1.1 cgd * Loose routing, and not at next destination
1020 1.1 cgd * yet; nothing to do except forward.
1021 1.1 cgd */
1022 1.1 cgd break;
1023 1.1 cgd }
1024 1.1 cgd off--; /* 0 origin */
1025 1.112 sommerfe if ((off + sizeof(struct in_addr)) > optlen) {
1026 1.1 cgd /*
1027 1.1 cgd * End of source route. Should be for us.
1028 1.1 cgd */
1029 1.1 cgd save_rte(cp, ip->ip_src);
1030 1.1 cgd break;
1031 1.1 cgd }
1032 1.1 cgd /*
1033 1.1 cgd * locate outgoing interface
1034 1.1 cgd */
1035 1.1 cgd bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
1036 1.1 cgd sizeof(ipaddr.sin_addr));
1037 1.96 thorpej if (opt == IPOPT_SSRR)
1038 1.96 thorpej ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
1039 1.96 thorpej else
1040 1.1 cgd ia = ip_rtaddr(ipaddr.sin_addr);
1041 1.1 cgd if (ia == 0) {
1042 1.1 cgd type = ICMP_UNREACH;
1043 1.1 cgd code = ICMP_UNREACH_SRCFAIL;
1044 1.1 cgd goto bad;
1045 1.1 cgd }
1046 1.1 cgd ip->ip_dst = ipaddr.sin_addr;
1047 1.20 mycroft bcopy((caddr_t)&ia->ia_addr.sin_addr,
1048 1.1 cgd (caddr_t)(cp + off), sizeof(struct in_addr));
1049 1.1 cgd cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1050 1.13 mycroft /*
1051 1.13 mycroft * Let ip_intr's mcast routing check handle mcast pkts
1052 1.13 mycroft */
1053 1.18 mycroft forward = !IN_MULTICAST(ip->ip_dst.s_addr);
1054 1.1 cgd break;
1055 1.1 cgd
1056 1.1 cgd case IPOPT_RR:
1057 1.114 itojun if (optlen < IPOPT_OFFSET + sizeof(*cp)) {
1058 1.114 itojun code = &cp[IPOPT_OLEN] - (u_char *)ip;
1059 1.114 itojun goto bad;
1060 1.114 itojun }
1061 1.1 cgd if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
1062 1.1 cgd code = &cp[IPOPT_OFFSET] - (u_char *)ip;
1063 1.1 cgd goto bad;
1064 1.1 cgd }
1065 1.1 cgd /*
1066 1.1 cgd * If no space remains, ignore.
1067 1.1 cgd */
1068 1.1 cgd off--; /* 0 origin */
1069 1.112 sommerfe if ((off + sizeof(struct in_addr)) > optlen)
1070 1.1 cgd break;
1071 1.1 cgd bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
1072 1.1 cgd sizeof(ipaddr.sin_addr));
1073 1.1 cgd /*
1074 1.1 cgd * locate outgoing interface; if we're the destination,
1075 1.1 cgd * use the incoming interface (should be same).
1076 1.1 cgd */
1077 1.96 thorpej if ((ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr))))
1078 1.96 thorpej == NULL &&
1079 1.96 thorpej (ia = ip_rtaddr(ipaddr.sin_addr)) == NULL) {
1080 1.1 cgd type = ICMP_UNREACH;
1081 1.1 cgd code = ICMP_UNREACH_HOST;
1082 1.1 cgd goto bad;
1083 1.1 cgd }
1084 1.20 mycroft bcopy((caddr_t)&ia->ia_addr.sin_addr,
1085 1.1 cgd (caddr_t)(cp + off), sizeof(struct in_addr));
1086 1.1 cgd cp[IPOPT_OFFSET] += sizeof(struct in_addr);
1087 1.1 cgd break;
1088 1.1 cgd
1089 1.1 cgd case IPOPT_TS:
1090 1.1 cgd code = cp - (u_char *)ip;
1091 1.1 cgd ipt = (struct ip_timestamp *)cp;
1092 1.114 itojun if (ipt->ipt_len < 4 || ipt->ipt_len > 40) {
1093 1.114 itojun code = (u_char *)&ipt->ipt_len - (u_char *)ip;
1094 1.1 cgd goto bad;
1095 1.114 itojun }
1096 1.114 itojun if (ipt->ipt_ptr < 5) {
1097 1.114 itojun code = (u_char *)&ipt->ipt_ptr - (u_char *)ip;
1098 1.114 itojun goto bad;
1099 1.114 itojun }
1100 1.15 cgd if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
1101 1.114 itojun if (++ipt->ipt_oflw == 0) {
1102 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1103 1.114 itojun (u_char *)ip;
1104 1.1 cgd goto bad;
1105 1.114 itojun }
1106 1.1 cgd break;
1107 1.1 cgd }
1108 1.104 thorpej cp0 = (cp + ipt->ipt_ptr - 1);
1109 1.1 cgd switch (ipt->ipt_flg) {
1110 1.1 cgd
1111 1.1 cgd case IPOPT_TS_TSONLY:
1112 1.1 cgd break;
1113 1.1 cgd
1114 1.1 cgd case IPOPT_TS_TSANDADDR:
1115 1.66 thorpej if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1116 1.114 itojun sizeof(struct in_addr) > ipt->ipt_len) {
1117 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1118 1.114 itojun (u_char *)ip;
1119 1.1 cgd goto bad;
1120 1.114 itojun }
1121 1.13 mycroft ipaddr.sin_addr = dst;
1122 1.96 thorpej ia = ifatoia(ifaof_ifpforaddr(sintosa(&ipaddr),
1123 1.96 thorpej m->m_pkthdr.rcvif));
1124 1.13 mycroft if (ia == 0)
1125 1.13 mycroft continue;
1126 1.104 thorpej bcopy(&ia->ia_addr.sin_addr,
1127 1.104 thorpej cp0, sizeof(struct in_addr));
1128 1.1 cgd ipt->ipt_ptr += sizeof(struct in_addr);
1129 1.1 cgd break;
1130 1.1 cgd
1131 1.1 cgd case IPOPT_TS_PRESPEC:
1132 1.66 thorpej if (ipt->ipt_ptr - 1 + sizeof(n_time) +
1133 1.114 itojun sizeof(struct in_addr) > ipt->ipt_len) {
1134 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1135 1.114 itojun (u_char *)ip;
1136 1.1 cgd goto bad;
1137 1.114 itojun }
1138 1.104 thorpej bcopy(cp0, &ipaddr.sin_addr,
1139 1.1 cgd sizeof(struct in_addr));
1140 1.96 thorpej if (ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)))
1141 1.96 thorpej == NULL)
1142 1.1 cgd continue;
1143 1.1 cgd ipt->ipt_ptr += sizeof(struct in_addr);
1144 1.1 cgd break;
1145 1.1 cgd
1146 1.1 cgd default:
1147 1.114 itojun /* XXX can't take &ipt->ipt_flg */
1148 1.114 itojun code = (u_char *)&ipt->ipt_ptr -
1149 1.114 itojun (u_char *)ip + 1;
1150 1.1 cgd goto bad;
1151 1.1 cgd }
1152 1.1 cgd ntime = iptime();
1153 1.107 thorpej cp0 = (u_char *) &ntime; /* XXX grumble, GCC... */
1154 1.107 thorpej bcopy(cp0, (caddr_t)cp + ipt->ipt_ptr - 1,
1155 1.1 cgd sizeof(n_time));
1156 1.1 cgd ipt->ipt_ptr += sizeof(n_time);
1157 1.1 cgd }
1158 1.1 cgd }
1159 1.1 cgd if (forward) {
1160 1.26 thorpej if (ip_forwsrcrt == 0) {
1161 1.26 thorpej type = ICMP_UNREACH;
1162 1.26 thorpej code = ICMP_UNREACH_SRCFAIL;
1163 1.26 thorpej goto bad;
1164 1.26 thorpej }
1165 1.1 cgd ip_forward(m, 1);
1166 1.1 cgd return (1);
1167 1.13 mycroft }
1168 1.13 mycroft return (0);
1169 1.1 cgd bad:
1170 1.13 mycroft icmp_error(m, type, code, 0, 0);
1171 1.13 mycroft ipstat.ips_badoptions++;
1172 1.1 cgd return (1);
1173 1.1 cgd }
1174 1.1 cgd
1175 1.1 cgd /*
1176 1.1 cgd * Given address of next destination (final or next hop),
1177 1.1 cgd * return internet address info of interface to be used to get there.
1178 1.1 cgd */
1179 1.1 cgd struct in_ifaddr *
1180 1.1 cgd ip_rtaddr(dst)
1181 1.1 cgd struct in_addr dst;
1182 1.1 cgd {
1183 1.109 augustss struct sockaddr_in *sin;
1184 1.1 cgd
1185 1.19 mycroft sin = satosin(&ipforward_rt.ro_dst);
1186 1.1 cgd
1187 1.35 mycroft if (ipforward_rt.ro_rt == 0 || !in_hosteq(dst, sin->sin_addr)) {
1188 1.1 cgd if (ipforward_rt.ro_rt) {
1189 1.1 cgd RTFREE(ipforward_rt.ro_rt);
1190 1.1 cgd ipforward_rt.ro_rt = 0;
1191 1.1 cgd }
1192 1.1 cgd sin->sin_family = AF_INET;
1193 1.1 cgd sin->sin_len = sizeof(*sin);
1194 1.1 cgd sin->sin_addr = dst;
1195 1.1 cgd
1196 1.1 cgd rtalloc(&ipforward_rt);
1197 1.1 cgd }
1198 1.1 cgd if (ipforward_rt.ro_rt == 0)
1199 1.1 cgd return ((struct in_ifaddr *)0);
1200 1.19 mycroft return (ifatoia(ipforward_rt.ro_rt->rt_ifa));
1201 1.1 cgd }
1202 1.1 cgd
1203 1.1 cgd /*
1204 1.1 cgd * Save incoming source route for use in replies,
1205 1.1 cgd * to be picked up later by ip_srcroute if the receiver is interested.
1206 1.1 cgd */
1207 1.13 mycroft void
1208 1.1 cgd save_rte(option, dst)
1209 1.1 cgd u_char *option;
1210 1.1 cgd struct in_addr dst;
1211 1.1 cgd {
1212 1.1 cgd unsigned olen;
1213 1.1 cgd
1214 1.1 cgd olen = option[IPOPT_OLEN];
1215 1.1 cgd #ifdef DIAGNOSTIC
1216 1.1 cgd if (ipprintfs)
1217 1.39 christos printf("save_rte: olen %d\n", olen);
1218 1.89 itojun #endif /* 0 */
1219 1.1 cgd if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
1220 1.1 cgd return;
1221 1.1 cgd bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
1222 1.1 cgd ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
1223 1.1 cgd ip_srcrt.dst = dst;
1224 1.1 cgd }
1225 1.1 cgd
1226 1.1 cgd /*
1227 1.1 cgd * Retrieve incoming source route for use in replies,
1228 1.1 cgd * in the same form used by setsockopt.
1229 1.1 cgd * The first hop is placed before the options, will be removed later.
1230 1.1 cgd */
1231 1.1 cgd struct mbuf *
1232 1.1 cgd ip_srcroute()
1233 1.1 cgd {
1234 1.109 augustss struct in_addr *p, *q;
1235 1.109 augustss struct mbuf *m;
1236 1.1 cgd
1237 1.1 cgd if (ip_nhops == 0)
1238 1.1 cgd return ((struct mbuf *)0);
1239 1.1 cgd m = m_get(M_DONTWAIT, MT_SOOPTS);
1240 1.1 cgd if (m == 0)
1241 1.1 cgd return ((struct mbuf *)0);
1242 1.1 cgd
1243 1.13 mycroft #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
1244 1.1 cgd
1245 1.1 cgd /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
1246 1.1 cgd m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
1247 1.1 cgd OPTSIZ;
1248 1.1 cgd #ifdef DIAGNOSTIC
1249 1.1 cgd if (ipprintfs)
1250 1.39 christos printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
1251 1.1 cgd #endif
1252 1.1 cgd
1253 1.1 cgd /*
1254 1.1 cgd * First save first hop for return route
1255 1.1 cgd */
1256 1.1 cgd p = &ip_srcrt.route[ip_nhops - 1];
1257 1.1 cgd *(mtod(m, struct in_addr *)) = *p--;
1258 1.1 cgd #ifdef DIAGNOSTIC
1259 1.1 cgd if (ipprintfs)
1260 1.39 christos printf(" hops %x", ntohl(mtod(m, struct in_addr *)->s_addr));
1261 1.1 cgd #endif
1262 1.1 cgd
1263 1.1 cgd /*
1264 1.1 cgd * Copy option fields and padding (nop) to mbuf.
1265 1.1 cgd */
1266 1.1 cgd ip_srcrt.nop = IPOPT_NOP;
1267 1.1 cgd ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
1268 1.1 cgd bcopy((caddr_t)&ip_srcrt.nop,
1269 1.1 cgd mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
1270 1.1 cgd q = (struct in_addr *)(mtod(m, caddr_t) +
1271 1.1 cgd sizeof(struct in_addr) + OPTSIZ);
1272 1.1 cgd #undef OPTSIZ
1273 1.1 cgd /*
1274 1.1 cgd * Record return path as an IP source route,
1275 1.1 cgd * reversing the path (pointers are now aligned).
1276 1.1 cgd */
1277 1.1 cgd while (p >= ip_srcrt.route) {
1278 1.1 cgd #ifdef DIAGNOSTIC
1279 1.1 cgd if (ipprintfs)
1280 1.39 christos printf(" %x", ntohl(q->s_addr));
1281 1.1 cgd #endif
1282 1.1 cgd *q++ = *p--;
1283 1.1 cgd }
1284 1.1 cgd /*
1285 1.1 cgd * Last hop goes to final destination.
1286 1.1 cgd */
1287 1.1 cgd *q = ip_srcrt.dst;
1288 1.1 cgd #ifdef DIAGNOSTIC
1289 1.1 cgd if (ipprintfs)
1290 1.39 christos printf(" %x\n", ntohl(q->s_addr));
1291 1.1 cgd #endif
1292 1.1 cgd return (m);
1293 1.1 cgd }
1294 1.1 cgd
1295 1.1 cgd /*
1296 1.1 cgd * Strip out IP options, at higher
1297 1.1 cgd * level protocol in the kernel.
1298 1.1 cgd * Second argument is buffer to which options
1299 1.1 cgd * will be moved, and return value is their length.
1300 1.1 cgd * XXX should be deleted; last arg currently ignored.
1301 1.1 cgd */
1302 1.8 mycroft void
1303 1.1 cgd ip_stripoptions(m, mopt)
1304 1.109 augustss struct mbuf *m;
1305 1.1 cgd struct mbuf *mopt;
1306 1.1 cgd {
1307 1.109 augustss int i;
1308 1.1 cgd struct ip *ip = mtod(m, struct ip *);
1309 1.109 augustss caddr_t opts;
1310 1.1 cgd int olen;
1311 1.1 cgd
1312 1.79 mycroft olen = (ip->ip_hl << 2) - sizeof (struct ip);
1313 1.1 cgd opts = (caddr_t)(ip + 1);
1314 1.1 cgd i = m->m_len - (sizeof (struct ip) + olen);
1315 1.1 cgd bcopy(opts + olen, opts, (unsigned)i);
1316 1.1 cgd m->m_len -= olen;
1317 1.1 cgd if (m->m_flags & M_PKTHDR)
1318 1.1 cgd m->m_pkthdr.len -= olen;
1319 1.79 mycroft ip->ip_len -= olen;
1320 1.79 mycroft ip->ip_hl = sizeof (struct ip) >> 2;
1321 1.1 cgd }
1322 1.1 cgd
1323 1.23 mycroft int inetctlerrmap[PRC_NCMDS] = {
1324 1.1 cgd 0, 0, 0, 0,
1325 1.1 cgd 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
1326 1.1 cgd EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
1327 1.1 cgd EMSGSIZE, EHOSTUNREACH, 0, 0,
1328 1.1 cgd 0, 0, 0, 0,
1329 1.1 cgd ENOPROTOOPT
1330 1.1 cgd };
1331 1.1 cgd
1332 1.1 cgd /*
1333 1.1 cgd * Forward a packet. If some error occurs return the sender
1334 1.1 cgd * an icmp packet. Note we can't always generate a meaningful
1335 1.1 cgd * icmp message because icmp doesn't have a large enough repertoire
1336 1.1 cgd * of codes and types.
1337 1.1 cgd *
1338 1.1 cgd * If not forwarding, just drop the packet. This could be confusing
1339 1.1 cgd * if ipforwarding was zero but some routing protocol was advancing
1340 1.1 cgd * us as a gateway to somewhere. However, we must let the routing
1341 1.1 cgd * protocol deal with that.
1342 1.1 cgd *
1343 1.1 cgd * The srcrt parameter indicates whether the packet is being forwarded
1344 1.1 cgd * via a source route.
1345 1.1 cgd */
1346 1.13 mycroft void
1347 1.1 cgd ip_forward(m, srcrt)
1348 1.1 cgd struct mbuf *m;
1349 1.1 cgd int srcrt;
1350 1.1 cgd {
1351 1.109 augustss struct ip *ip = mtod(m, struct ip *);
1352 1.109 augustss struct sockaddr_in *sin;
1353 1.109 augustss struct rtentry *rt;
1354 1.28 christos int error, type = 0, code = 0;
1355 1.1 cgd struct mbuf *mcopy;
1356 1.13 mycroft n_long dest;
1357 1.13 mycroft struct ifnet *destifp;
1358 1.89 itojun #ifdef IPSEC
1359 1.89 itojun struct ifnet dummyifp;
1360 1.89 itojun #endif
1361 1.1 cgd
1362 1.13 mycroft dest = 0;
1363 1.1 cgd #ifdef DIAGNOSTIC
1364 1.1 cgd if (ipprintfs)
1365 1.70 thorpej printf("forward: src %2.2x dst %2.2x ttl %x\n",
1366 1.70 thorpej ntohl(ip->ip_src.s_addr),
1367 1.70 thorpej ntohl(ip->ip_dst.s_addr), ip->ip_ttl);
1368 1.1 cgd #endif
1369 1.93 sommerfe if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
1370 1.1 cgd ipstat.ips_cantforward++;
1371 1.1 cgd m_freem(m);
1372 1.1 cgd return;
1373 1.1 cgd }
1374 1.1 cgd if (ip->ip_ttl <= IPTTLDEC) {
1375 1.13 mycroft icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1376 1.1 cgd return;
1377 1.1 cgd }
1378 1.1 cgd ip->ip_ttl -= IPTTLDEC;
1379 1.1 cgd
1380 1.19 mycroft sin = satosin(&ipforward_rt.ro_dst);
1381 1.1 cgd if ((rt = ipforward_rt.ro_rt) == 0 ||
1382 1.35 mycroft !in_hosteq(ip->ip_dst, sin->sin_addr)) {
1383 1.1 cgd if (ipforward_rt.ro_rt) {
1384 1.1 cgd RTFREE(ipforward_rt.ro_rt);
1385 1.1 cgd ipforward_rt.ro_rt = 0;
1386 1.1 cgd }
1387 1.1 cgd sin->sin_family = AF_INET;
1388 1.35 mycroft sin->sin_len = sizeof(struct sockaddr_in);
1389 1.1 cgd sin->sin_addr = ip->ip_dst;
1390 1.1 cgd
1391 1.1 cgd rtalloc(&ipforward_rt);
1392 1.1 cgd if (ipforward_rt.ro_rt == 0) {
1393 1.13 mycroft icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1394 1.1 cgd return;
1395 1.1 cgd }
1396 1.1 cgd rt = ipforward_rt.ro_rt;
1397 1.1 cgd }
1398 1.1 cgd
1399 1.1 cgd /*
1400 1.34 mycroft * Save at most 68 bytes of the packet in case
1401 1.1 cgd * we need to generate an ICMP message to the src.
1402 1.1 cgd */
1403 1.34 mycroft mcopy = m_copy(m, 0, imin((int)ip->ip_len, 68));
1404 1.1 cgd
1405 1.1 cgd /*
1406 1.1 cgd * If forwarding packet using same interface that it came in on,
1407 1.1 cgd * perhaps should send a redirect to sender to shortcut a hop.
1408 1.1 cgd * Only send redirect if source is sending directly to us,
1409 1.1 cgd * and if packet was not source routed (or has any options).
1410 1.1 cgd * Also, don't send redirect if forwarding using a default route
1411 1.1 cgd * or a route modified by a redirect.
1412 1.1 cgd */
1413 1.1 cgd if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1414 1.1 cgd (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1415 1.35 mycroft !in_nullhost(satosin(rt_key(rt))->sin_addr) &&
1416 1.1 cgd ipsendredirects && !srcrt) {
1417 1.19 mycroft if (rt->rt_ifa &&
1418 1.19 mycroft (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
1419 1.19 mycroft ifatoia(rt->rt_ifa)->ia_subnet) {
1420 1.77 thorpej if (rt->rt_flags & RTF_GATEWAY)
1421 1.77 thorpej dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1422 1.77 thorpej else
1423 1.77 thorpej dest = ip->ip_dst.s_addr;
1424 1.77 thorpej /*
1425 1.77 thorpej * Router requirements says to only send host
1426 1.77 thorpej * redirects.
1427 1.77 thorpej */
1428 1.77 thorpej type = ICMP_REDIRECT;
1429 1.77 thorpej code = ICMP_REDIRECT_HOST;
1430 1.1 cgd #ifdef DIAGNOSTIC
1431 1.77 thorpej if (ipprintfs)
1432 1.77 thorpej printf("redirect (%d) to %x\n", code,
1433 1.77 thorpej (u_int32_t)dest);
1434 1.1 cgd #endif
1435 1.1 cgd }
1436 1.1 cgd }
1437 1.1 cgd
1438 1.89 itojun #ifdef IPSEC
1439 1.103 itojun /* Don't lookup socket in forwading case */
1440 1.103 itojun ipsec_setsocket(m, NULL);
1441 1.103 itojun #endif
1442 1.27 thorpej error = ip_output(m, (struct mbuf *)0, &ipforward_rt,
1443 1.27 thorpej (IP_FORWARDING | (ip_directedbcast ? IP_ALLOWBROADCAST : 0)), 0);
1444 1.1 cgd if (error)
1445 1.1 cgd ipstat.ips_cantforward++;
1446 1.1 cgd else {
1447 1.1 cgd ipstat.ips_forward++;
1448 1.1 cgd if (type)
1449 1.1 cgd ipstat.ips_redirectsent++;
1450 1.1 cgd else {
1451 1.63 matt if (mcopy) {
1452 1.63 matt #ifdef GATEWAY
1453 1.64 thorpej if (mcopy->m_flags & M_CANFASTFWD)
1454 1.64 thorpej ipflow_create(&ipforward_rt, mcopy);
1455 1.63 matt #endif
1456 1.1 cgd m_freem(mcopy);
1457 1.63 matt }
1458 1.1 cgd return;
1459 1.1 cgd }
1460 1.1 cgd }
1461 1.1 cgd if (mcopy == NULL)
1462 1.1 cgd return;
1463 1.13 mycroft destifp = NULL;
1464 1.13 mycroft
1465 1.1 cgd switch (error) {
1466 1.1 cgd
1467 1.1 cgd case 0: /* forwarded, but need redirect */
1468 1.1 cgd /* type, code set above */
1469 1.1 cgd break;
1470 1.1 cgd
1471 1.1 cgd case ENETUNREACH: /* shouldn't happen, checked above */
1472 1.1 cgd case EHOSTUNREACH:
1473 1.1 cgd case ENETDOWN:
1474 1.1 cgd case EHOSTDOWN:
1475 1.1 cgd default:
1476 1.1 cgd type = ICMP_UNREACH;
1477 1.1 cgd code = ICMP_UNREACH_HOST;
1478 1.1 cgd break;
1479 1.1 cgd
1480 1.1 cgd case EMSGSIZE:
1481 1.1 cgd type = ICMP_UNREACH;
1482 1.1 cgd code = ICMP_UNREACH_NEEDFRAG;
1483 1.89 itojun #ifndef IPSEC
1484 1.13 mycroft if (ipforward_rt.ro_rt)
1485 1.13 mycroft destifp = ipforward_rt.ro_rt->rt_ifp;
1486 1.89 itojun #else
1487 1.89 itojun /*
1488 1.89 itojun * If the packet is routed over IPsec tunnel, tell the
1489 1.89 itojun * originator the tunnel MTU.
1490 1.89 itojun * tunnel MTU = if MTU - sizeof(IP) - ESP/AH hdrsiz
1491 1.89 itojun * XXX quickhack!!!
1492 1.89 itojun */
1493 1.89 itojun if (ipforward_rt.ro_rt) {
1494 1.89 itojun struct secpolicy *sp;
1495 1.89 itojun int ipsecerror;
1496 1.95 itojun size_t ipsechdr;
1497 1.89 itojun struct route *ro;
1498 1.89 itojun
1499 1.89 itojun sp = ipsec4_getpolicybyaddr(mcopy,
1500 1.95 itojun IPSEC_DIR_OUTBOUND,
1501 1.95 itojun IP_FORWARDING,
1502 1.95 itojun &ipsecerror);
1503 1.89 itojun
1504 1.89 itojun if (sp == NULL)
1505 1.89 itojun destifp = ipforward_rt.ro_rt->rt_ifp;
1506 1.89 itojun else {
1507 1.89 itojun /* count IPsec header size */
1508 1.95 itojun ipsechdr = ipsec4_hdrsiz(mcopy,
1509 1.95 itojun IPSEC_DIR_OUTBOUND,
1510 1.95 itojun NULL);
1511 1.89 itojun
1512 1.89 itojun /*
1513 1.89 itojun * find the correct route for outer IPv4
1514 1.89 itojun * header, compute tunnel MTU.
1515 1.89 itojun *
1516 1.89 itojun * XXX BUG ALERT
1517 1.89 itojun * The "dummyifp" code relies upon the fact
1518 1.89 itojun * that icmp_error() touches only ifp->if_mtu.
1519 1.89 itojun */
1520 1.89 itojun /*XXX*/
1521 1.89 itojun destifp = NULL;
1522 1.89 itojun if (sp->req != NULL
1523 1.95 itojun && sp->req->sav != NULL
1524 1.95 itojun && sp->req->sav->sah != NULL) {
1525 1.95 itojun ro = &sp->req->sav->sah->sa_route;
1526 1.89 itojun if (ro->ro_rt && ro->ro_rt->rt_ifp) {
1527 1.89 itojun dummyifp.if_mtu =
1528 1.89 itojun ro->ro_rt->rt_ifp->if_mtu;
1529 1.89 itojun dummyifp.if_mtu -= ipsechdr;
1530 1.89 itojun destifp = &dummyifp;
1531 1.89 itojun }
1532 1.89 itojun }
1533 1.89 itojun
1534 1.89 itojun key_freesp(sp);
1535 1.89 itojun }
1536 1.89 itojun }
1537 1.89 itojun #endif /*IPSEC*/
1538 1.1 cgd ipstat.ips_cantfrag++;
1539 1.1 cgd break;
1540 1.1 cgd
1541 1.1 cgd case ENOBUFS:
1542 1.1 cgd type = ICMP_SOURCEQUENCH;
1543 1.1 cgd code = 0;
1544 1.1 cgd break;
1545 1.1 cgd }
1546 1.13 mycroft icmp_error(mcopy, type, code, dest, destifp);
1547 1.44 thorpej }
1548 1.44 thorpej
1549 1.44 thorpej void
1550 1.44 thorpej ip_savecontrol(inp, mp, ip, m)
1551 1.109 augustss struct inpcb *inp;
1552 1.109 augustss struct mbuf **mp;
1553 1.109 augustss struct ip *ip;
1554 1.109 augustss struct mbuf *m;
1555 1.44 thorpej {
1556 1.44 thorpej
1557 1.44 thorpej if (inp->inp_socket->so_options & SO_TIMESTAMP) {
1558 1.44 thorpej struct timeval tv;
1559 1.44 thorpej
1560 1.44 thorpej microtime(&tv);
1561 1.44 thorpej *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
1562 1.44 thorpej SCM_TIMESTAMP, SOL_SOCKET);
1563 1.44 thorpej if (*mp)
1564 1.44 thorpej mp = &(*mp)->m_next;
1565 1.44 thorpej }
1566 1.44 thorpej if (inp->inp_flags & INP_RECVDSTADDR) {
1567 1.44 thorpej *mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
1568 1.44 thorpej sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
1569 1.44 thorpej if (*mp)
1570 1.44 thorpej mp = &(*mp)->m_next;
1571 1.44 thorpej }
1572 1.44 thorpej #ifdef notyet
1573 1.44 thorpej /*
1574 1.44 thorpej * XXX
1575 1.44 thorpej * Moving these out of udp_input() made them even more broken
1576 1.44 thorpej * than they already were.
1577 1.44 thorpej * - fenner (at) parc.xerox.com
1578 1.44 thorpej */
1579 1.44 thorpej /* options were tossed already */
1580 1.44 thorpej if (inp->inp_flags & INP_RECVOPTS) {
1581 1.44 thorpej *mp = sbcreatecontrol((caddr_t) opts_deleted_above,
1582 1.44 thorpej sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
1583 1.44 thorpej if (*mp)
1584 1.44 thorpej mp = &(*mp)->m_next;
1585 1.44 thorpej }
1586 1.44 thorpej /* ip_srcroute doesn't do what we want here, need to fix */
1587 1.44 thorpej if (inp->inp_flags & INP_RECVRETOPTS) {
1588 1.44 thorpej *mp = sbcreatecontrol((caddr_t) ip_srcroute(),
1589 1.44 thorpej sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
1590 1.44 thorpej if (*mp)
1591 1.44 thorpej mp = &(*mp)->m_next;
1592 1.44 thorpej }
1593 1.44 thorpej #endif
1594 1.44 thorpej if (inp->inp_flags & INP_RECVIF) {
1595 1.44 thorpej struct sockaddr_dl sdl;
1596 1.44 thorpej
1597 1.44 thorpej sdl.sdl_len = offsetof(struct sockaddr_dl, sdl_data[0]);
1598 1.44 thorpej sdl.sdl_family = AF_LINK;
1599 1.44 thorpej sdl.sdl_index = m->m_pkthdr.rcvif ?
1600 1.44 thorpej m->m_pkthdr.rcvif->if_index : 0;
1601 1.44 thorpej sdl.sdl_nlen = sdl.sdl_alen = sdl.sdl_slen = 0;
1602 1.44 thorpej *mp = sbcreatecontrol((caddr_t) &sdl, sdl.sdl_len,
1603 1.44 thorpej IP_RECVIF, IPPROTO_IP);
1604 1.44 thorpej if (*mp)
1605 1.44 thorpej mp = &(*mp)->m_next;
1606 1.44 thorpej }
1607 1.13 mycroft }
1608 1.13 mycroft
1609 1.13 mycroft int
1610 1.13 mycroft ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1611 1.13 mycroft int *name;
1612 1.13 mycroft u_int namelen;
1613 1.13 mycroft void *oldp;
1614 1.13 mycroft size_t *oldlenp;
1615 1.13 mycroft void *newp;
1616 1.13 mycroft size_t newlen;
1617 1.13 mycroft {
1618 1.88 sommerfe extern int subnetsarelocal, hostzeroisbroadcast;
1619 1.52 thorpej
1620 1.54 lukem int error, old;
1621 1.54 lukem
1622 1.13 mycroft /* All sysctl names at this level are terminal. */
1623 1.13 mycroft if (namelen != 1)
1624 1.13 mycroft return (ENOTDIR);
1625 1.13 mycroft
1626 1.13 mycroft switch (name[0]) {
1627 1.13 mycroft case IPCTL_FORWARDING:
1628 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1629 1.13 mycroft case IPCTL_SENDREDIRECTS:
1630 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen,
1631 1.13 mycroft &ipsendredirects));
1632 1.13 mycroft case IPCTL_DEFTTL:
1633 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1634 1.13 mycroft #ifdef notyet
1635 1.13 mycroft case IPCTL_DEFMTU:
1636 1.13 mycroft return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1637 1.13 mycroft #endif
1638 1.26 thorpej case IPCTL_FORWSRCRT:
1639 1.47 cjs /* Don't allow this to change in a secure environment. */
1640 1.26 thorpej if (securelevel > 0)
1641 1.46 cjs return (sysctl_rdint(oldp, oldlenp, newp,
1642 1.46 cjs ip_forwsrcrt));
1643 1.46 cjs else
1644 1.46 cjs return (sysctl_int(oldp, oldlenp, newp, newlen,
1645 1.46 cjs &ip_forwsrcrt));
1646 1.27 thorpej case IPCTL_DIRECTEDBCAST:
1647 1.27 thorpej return (sysctl_int(oldp, oldlenp, newp, newlen,
1648 1.27 thorpej &ip_directedbcast));
1649 1.47 cjs case IPCTL_ALLOWSRCRT:
1650 1.47 cjs return (sysctl_int(oldp, oldlenp, newp, newlen,
1651 1.47 cjs &ip_allowsrcrt));
1652 1.52 thorpej case IPCTL_SUBNETSARELOCAL:
1653 1.52 thorpej return (sysctl_int(oldp, oldlenp, newp, newlen,
1654 1.52 thorpej &subnetsarelocal));
1655 1.53 kml case IPCTL_MTUDISC:
1656 1.60 kml error = sysctl_int(oldp, oldlenp, newp, newlen,
1657 1.60 kml &ip_mtudisc);
1658 1.60 kml if (ip_mtudisc != 0 && ip_mtudisc_timeout_q == NULL) {
1659 1.60 kml ip_mtudisc_timeout_q =
1660 1.60 kml rt_timer_queue_create(ip_mtudisc_timeout);
1661 1.60 kml } else if (ip_mtudisc == 0 && ip_mtudisc_timeout_q != NULL) {
1662 1.60 kml rt_timer_queue_destroy(ip_mtudisc_timeout_q, TRUE);
1663 1.60 kml ip_mtudisc_timeout_q = NULL;
1664 1.60 kml }
1665 1.60 kml return error;
1666 1.54 lukem case IPCTL_ANONPORTMIN:
1667 1.54 lukem old = anonportmin;
1668 1.54 lukem error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmin);
1669 1.54 lukem if (anonportmin >= anonportmax || anonportmin > 65535
1670 1.54 lukem #ifndef IPNOPRIVPORTS
1671 1.54 lukem || anonportmin < IPPORT_RESERVED
1672 1.54 lukem #endif
1673 1.54 lukem ) {
1674 1.54 lukem anonportmin = old;
1675 1.54 lukem return (EINVAL);
1676 1.54 lukem }
1677 1.54 lukem return (error);
1678 1.54 lukem case IPCTL_ANONPORTMAX:
1679 1.54 lukem old = anonportmax;
1680 1.54 lukem error = sysctl_int(oldp, oldlenp, newp, newlen, &anonportmax);
1681 1.54 lukem if (anonportmin >= anonportmax || anonportmax > 65535
1682 1.54 lukem #ifndef IPNOPRIVPORTS
1683 1.54 lukem || anonportmax < IPPORT_RESERVED
1684 1.54 lukem #endif
1685 1.54 lukem ) {
1686 1.54 lukem anonportmax = old;
1687 1.54 lukem return (EINVAL);
1688 1.54 lukem }
1689 1.60 kml return (error);
1690 1.60 kml case IPCTL_MTUDISCTIMEOUT:
1691 1.60 kml error = sysctl_int(oldp, oldlenp, newp, newlen,
1692 1.60 kml &ip_mtudisc_timeout);
1693 1.60 kml if (ip_mtudisc_timeout_q != NULL)
1694 1.60 kml rt_timer_queue_change(ip_mtudisc_timeout_q,
1695 1.60 kml ip_mtudisc_timeout);
1696 1.54 lukem return (error);
1697 1.65 matt #ifdef GATEWAY
1698 1.65 matt case IPCTL_MAXFLOWS:
1699 1.67 thorpej {
1700 1.67 thorpej int s;
1701 1.67 thorpej
1702 1.65 matt error = sysctl_int(oldp, oldlenp, newp, newlen,
1703 1.65 matt &ip_maxflows);
1704 1.67 thorpej s = splsoftnet();
1705 1.65 matt ipflow_reap(0);
1706 1.67 thorpej splx(s);
1707 1.65 matt return (error);
1708 1.67 thorpej }
1709 1.89 itojun #endif
1710 1.90 itojun case IPCTL_HOSTZEROBROADCAST:
1711 1.90 itojun return (sysctl_int(oldp, oldlenp, newp, newlen,
1712 1.90 itojun &hostzeroisbroadcast));
1713 1.89 itojun #if NGIF > 0
1714 1.89 itojun case IPCTL_GIF_TTL:
1715 1.89 itojun return(sysctl_int(oldp, oldlenp, newp, newlen,
1716 1.90 itojun &ip_gif_ttl));
1717 1.65 matt #endif
1718 1.88 sommerfe
1719 1.13 mycroft default:
1720 1.13 mycroft return (EOPNOTSUPP);
1721 1.13 mycroft }
1722 1.13 mycroft /* NOTREACHED */
1723 1.1 cgd }
1724