ip_input.c revision 1.26 1 /* $NetBSD: ip_input.c,v 1.26 1996/01/15 21:11:55 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1988, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)ip_input.c 8.2 (Berkeley) 1/4/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/domain.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/errno.h>
46 #include <sys/time.h>
47 #include <sys/kernel.h>
48
49 #include <net/if.h>
50 #include <net/route.h>
51
52 #include <netinet/in.h>
53 #include <netinet/in_systm.h>
54 #include <netinet/ip.h>
55 #include <netinet/in_pcb.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip_var.h>
58 #include <netinet/ip_icmp.h>
59
60 #ifndef IPFORWARDING
61 #ifdef GATEWAY
62 #define IPFORWARDING 1 /* forward IP packets not for us */
63 #else /* GATEWAY */
64 #define IPFORWARDING 0 /* don't forward IP packets not for us */
65 #endif /* GATEWAY */
66 #endif /* IPFORWARDING */
67 #ifndef IPSENDREDIRECTS
68 #define IPSENDREDIRECTS 1
69 #endif
70 #ifndef IPFORWSRCRT
71 #define IPFORWSRCRT 1 /* allow source-routed packets */
72 #endif
73 int ipforwarding = IPFORWARDING;
74 int ipsendredirects = IPSENDREDIRECTS;
75 int ip_defttl = IPDEFTTL;
76 int ip_forwsrcrt = IPFORWSRCRT;
77 #ifdef DIAGNOSTIC
78 int ipprintfs = 0;
79 #endif
80
81 extern struct domain inetdomain;
82 extern struct protosw inetsw[];
83 u_char ip_protox[IPPROTO_MAX];
84 int ipqmaxlen = IFQ_MAXLEN;
85 struct in_ifaddrhead in_ifaddr;
86 struct ifqueue ipintrq;
87
88 /*
89 * We need to save the IP options in case a protocol wants to respond
90 * to an incoming packet over the same route if the packet got here
91 * using IP source routing. This allows connection establishment and
92 * maintenance when the remote end is on a network that is not known
93 * to us.
94 */
95 int ip_nhops = 0;
96 static struct ip_srcrt {
97 struct in_addr dst; /* final destination */
98 char nop; /* one NOP to align */
99 char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */
100 struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
101 } ip_srcrt;
102
103 static void save_rte __P((u_char *, struct in_addr));
104 /*
105 * IP initialization: fill in IP protocol switch table.
106 * All protocols not implemented in kernel go to raw IP protocol handler.
107 */
108 void
109 ip_init()
110 {
111 register struct protosw *pr;
112 register int i;
113
114 pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
115 if (pr == 0)
116 panic("ip_init");
117 for (i = 0; i < IPPROTO_MAX; i++)
118 ip_protox[i] = pr - inetsw;
119 for (pr = inetdomain.dom_protosw;
120 pr < inetdomain.dom_protoswNPROTOSW; pr++)
121 if (pr->pr_domain->dom_family == PF_INET &&
122 pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
123 ip_protox[pr->pr_protocol] = pr - inetsw;
124 LIST_INIT(&ipq);
125 ip_id = time.tv_sec & 0xffff;
126 ipintrq.ifq_maxlen = ipqmaxlen;
127 TAILQ_INIT(&in_ifaddr);
128 }
129
130 struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
131 struct route ipforward_rt;
132
133 /*
134 * Ip input routine. Checksum and byte swap header. If fragmented
135 * try to reassemble. Process options. Pass to next level.
136 */
137 void
138 ipintr()
139 {
140 register struct ip *ip;
141 register struct mbuf *m;
142 register struct ipq *fp;
143 register struct in_ifaddr *ia;
144 struct ipqent *ipqe;
145 int hlen, mff, s;
146
147 next:
148 /*
149 * Get next datagram off input queue and get IP header
150 * in first mbuf.
151 */
152 s = splimp();
153 IF_DEQUEUE(&ipintrq, m);
154 splx(s);
155 if (m == 0)
156 return;
157 #ifdef DIAGNOSTIC
158 if ((m->m_flags & M_PKTHDR) == 0)
159 panic("ipintr no HDR");
160 #endif
161 /*
162 * If no IP addresses have been set yet but the interfaces
163 * are receiving, can't do anything with incoming packets yet.
164 */
165 if (in_ifaddr.tqh_first == 0)
166 goto bad;
167 ipstat.ips_total++;
168 if (m->m_len < sizeof (struct ip) &&
169 (m = m_pullup(m, sizeof (struct ip))) == 0) {
170 ipstat.ips_toosmall++;
171 goto next;
172 }
173 ip = mtod(m, struct ip *);
174 if (ip->ip_v != IPVERSION) {
175 ipstat.ips_badvers++;
176 goto bad;
177 }
178 hlen = ip->ip_hl << 2;
179 if (hlen < sizeof(struct ip)) { /* minimum header length */
180 ipstat.ips_badhlen++;
181 goto bad;
182 }
183 if (hlen > m->m_len) {
184 if ((m = m_pullup(m, hlen)) == 0) {
185 ipstat.ips_badhlen++;
186 goto next;
187 }
188 ip = mtod(m, struct ip *);
189 }
190 if (ip->ip_sum = in_cksum(m, hlen)) {
191 ipstat.ips_badsum++;
192 goto bad;
193 }
194
195 /*
196 * Convert fields to host representation.
197 */
198 NTOHS(ip->ip_len);
199 if (ip->ip_len < hlen) {
200 ipstat.ips_badlen++;
201 goto bad;
202 }
203 NTOHS(ip->ip_id);
204 NTOHS(ip->ip_off);
205
206 /*
207 * Check that the amount of data in the buffers
208 * is as at least much as the IP header would have us expect.
209 * Trim mbufs if longer than we expect.
210 * Drop packet if shorter than we expect.
211 */
212 if (m->m_pkthdr.len < ip->ip_len) {
213 ipstat.ips_tooshort++;
214 goto bad;
215 }
216 if (m->m_pkthdr.len > ip->ip_len) {
217 if (m->m_len == m->m_pkthdr.len) {
218 m->m_len = ip->ip_len;
219 m->m_pkthdr.len = ip->ip_len;
220 } else
221 m_adj(m, ip->ip_len - m->m_pkthdr.len);
222 }
223
224 /*
225 * Process options and, if not destined for us,
226 * ship it on. ip_dooptions returns 1 when an
227 * error was detected (causing an icmp message
228 * to be sent and the original packet to be freed).
229 */
230 ip_nhops = 0; /* for source routed packets */
231 if (hlen > sizeof (struct ip) && ip_dooptions(m))
232 goto next;
233
234 /*
235 * Check our list of addresses, to see if the packet is for us.
236 */
237 for (ia = in_ifaddr.tqh_first; ia; ia = ia->ia_list.tqe_next) {
238 if (ip->ip_dst.s_addr == ia->ia_addr.sin_addr.s_addr)
239 goto ours;
240 if (
241 #ifdef DIRECTED_BROADCAST
242 ia->ia_ifp == m->m_pkthdr.rcvif &&
243 #endif
244 (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
245 if (ip->ip_dst.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
246 ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr ||
247 /*
248 * Look for all-0's host part (old broadcast addr),
249 * either for subnet or net.
250 */
251 ip->ip_dst.s_addr == ia->ia_subnet ||
252 ip->ip_dst.s_addr == ia->ia_net)
253 goto ours;
254 }
255 }
256 if (IN_MULTICAST(ip->ip_dst.s_addr)) {
257 struct in_multi *inm;
258 #ifdef MROUTING
259 extern struct socket *ip_mrouter;
260
261 if (m->m_flags & M_EXT) {
262 if ((m = m_pullup(m, hlen)) == 0) {
263 ipstat.ips_toosmall++;
264 goto next;
265 }
266 ip = mtod(m, struct ip *);
267 }
268
269 if (ip_mrouter) {
270 /*
271 * If we are acting as a multicast router, all
272 * incoming multicast packets are passed to the
273 * kernel-level multicast forwarding function.
274 * The packet is returned (relatively) intact; if
275 * ip_mforward() returns a non-zero value, the packet
276 * must be discarded, else it may be accepted below.
277 *
278 * (The IP ident field is put in the same byte order
279 * as expected when ip_mforward() is called from
280 * ip_output().)
281 */
282 ip->ip_id = htons(ip->ip_id);
283 if (ip_mforward(m, m->m_pkthdr.rcvif) != 0) {
284 ipstat.ips_cantforward++;
285 m_freem(m);
286 goto next;
287 }
288 ip->ip_id = ntohs(ip->ip_id);
289
290 /*
291 * The process-level routing demon needs to receive
292 * all multicast IGMP packets, whether or not this
293 * host belongs to their destination groups.
294 */
295 if (ip->ip_p == IPPROTO_IGMP)
296 goto ours;
297 ipstat.ips_forward++;
298 }
299 #endif
300 /*
301 * See if we belong to the destination multicast group on the
302 * arrival interface.
303 */
304 IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
305 if (inm == NULL) {
306 ipstat.ips_cantforward++;
307 m_freem(m);
308 goto next;
309 }
310 goto ours;
311 }
312 if (ip->ip_dst.s_addr == INADDR_BROADCAST ||
313 ip->ip_dst.s_addr == INADDR_ANY)
314 goto ours;
315
316 /*
317 * Not for us; forward if possible and desirable.
318 */
319 if (ipforwarding == 0) {
320 ipstat.ips_cantforward++;
321 m_freem(m);
322 } else
323 ip_forward(m, 0);
324 goto next;
325
326 ours:
327 /*
328 * If offset or IP_MF are set, must reassemble.
329 * Otherwise, nothing need be done.
330 * (We could look in the reassembly queue to see
331 * if the packet was previously fragmented,
332 * but it's not worth the time; just let them time out.)
333 */
334 if (ip->ip_off &~ IP_DF) {
335 if (m->m_flags & M_EXT) { /* XXX */
336 if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
337 ipstat.ips_toosmall++;
338 goto next;
339 }
340 ip = mtod(m, struct ip *);
341 }
342 /*
343 * Look for queue of fragments
344 * of this datagram.
345 */
346 for (fp = ipq.lh_first; fp != NULL; fp = fp->ipq_q.le_next)
347 if (ip->ip_id == fp->ipq_id &&
348 ip->ip_src.s_addr == fp->ipq_src.s_addr &&
349 ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
350 ip->ip_p == fp->ipq_p)
351 goto found;
352 fp = 0;
353 found:
354
355 /*
356 * Adjust ip_len to not reflect header,
357 * set ipqe_mff if more fragments are expected,
358 * convert offset of this to bytes.
359 */
360 ip->ip_len -= hlen;
361 mff = (ip->ip_off & IP_MF) != 0;
362 if (mff) {
363 /*
364 * Make sure that fragments have a data length
365 * that's a non-zero multiple of 8 bytes.
366 */
367 if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
368 ipstat.ips_badfrags++;
369 goto bad;
370 }
371 }
372 ip->ip_off <<= 3;
373
374 /*
375 * If datagram marked as having more fragments
376 * or if this is not the first fragment,
377 * attempt reassembly; if it succeeds, proceed.
378 */
379 if (mff || ip->ip_off) {
380 ipstat.ips_fragments++;
381 MALLOC(ipqe, struct ipqent *, sizeof (struct ipqent),
382 M_IPQ, M_NOWAIT);
383 if (ipqe == NULL) {
384 ipstat.ips_rcvmemdrop++;
385 goto bad;
386 }
387 ipqe->ipqe_mff = mff;
388 ipqe->ipqe_ip = ip;
389 ip = ip_reass(ipqe, fp);
390 if (ip == 0)
391 goto next;
392 ipstat.ips_reassembled++;
393 m = dtom(ip);
394 } else
395 if (fp)
396 ip_freef(fp);
397 } else
398 ip->ip_len -= hlen;
399
400 /*
401 * Switch out to protocol's input routine.
402 */
403 ipstat.ips_delivered++;
404 (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
405 goto next;
406 bad:
407 m_freem(m);
408 goto next;
409 }
410
411 /*
412 * Take incoming datagram fragment and try to
413 * reassemble it into whole datagram. If a chain for
414 * reassembly of this datagram already exists, then it
415 * is given as fp; otherwise have to make a chain.
416 */
417 struct ip *
418 ip_reass(ipqe, fp)
419 register struct ipqent *ipqe;
420 register struct ipq *fp;
421 {
422 register struct mbuf *m = dtom(ipqe->ipqe_ip);
423 register struct ipqent *nq, *p, *q;
424 struct ip *ip;
425 struct mbuf *t;
426 int hlen = ipqe->ipqe_ip->ip_hl << 2;
427 int i, next;
428
429 /*
430 * Presence of header sizes in mbufs
431 * would confuse code below.
432 */
433 m->m_data += hlen;
434 m->m_len -= hlen;
435
436 /*
437 * If first fragment to arrive, create a reassembly queue.
438 */
439 if (fp == 0) {
440 if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
441 goto dropfrag;
442 fp = mtod(t, struct ipq *);
443 LIST_INSERT_HEAD(&ipq, fp, ipq_q);
444 fp->ipq_ttl = IPFRAGTTL;
445 fp->ipq_p = ipqe->ipqe_ip->ip_p;
446 fp->ipq_id = ipqe->ipqe_ip->ip_id;
447 LIST_INIT(&fp->ipq_fragq);
448 fp->ipq_src = ipqe->ipqe_ip->ip_src;
449 fp->ipq_dst = ipqe->ipqe_ip->ip_dst;
450 p = NULL;
451 goto insert;
452 }
453
454 /*
455 * Find a segment which begins after this one does.
456 */
457 for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
458 p = q, q = q->ipqe_q.le_next)
459 if (q->ipqe_ip->ip_off > ipqe->ipqe_ip->ip_off)
460 break;
461
462 /*
463 * If there is a preceding segment, it may provide some of
464 * our data already. If so, drop the data from the incoming
465 * segment. If it provides all of our data, drop us.
466 */
467 if (p != NULL) {
468 i = p->ipqe_ip->ip_off + p->ipqe_ip->ip_len -
469 ipqe->ipqe_ip->ip_off;
470 if (i > 0) {
471 if (i >= ipqe->ipqe_ip->ip_len)
472 goto dropfrag;
473 m_adj(dtom(ipqe->ipqe_ip), i);
474 ipqe->ipqe_ip->ip_off += i;
475 ipqe->ipqe_ip->ip_len -= i;
476 }
477 }
478
479 /*
480 * While we overlap succeeding segments trim them or,
481 * if they are completely covered, dequeue them.
482 */
483 for (; q != NULL && ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len >
484 q->ipqe_ip->ip_off; q = nq) {
485 i = (ipqe->ipqe_ip->ip_off + ipqe->ipqe_ip->ip_len) -
486 q->ipqe_ip->ip_off;
487 if (i < q->ipqe_ip->ip_len) {
488 q->ipqe_ip->ip_len -= i;
489 q->ipqe_ip->ip_off += i;
490 m_adj(dtom(q->ipqe_ip), i);
491 break;
492 }
493 nq = q->ipqe_q.le_next;
494 m_freem(dtom(q->ipqe_ip));
495 LIST_REMOVE(q, ipqe_q);
496 FREE(q, M_IPQ);
497 }
498
499 insert:
500 /*
501 * Stick new segment in its place;
502 * check for complete reassembly.
503 */
504 if (p == NULL) {
505 LIST_INSERT_HEAD(&fp->ipq_fragq, ipqe, ipqe_q);
506 } else {
507 LIST_INSERT_AFTER(p, ipqe, ipqe_q);
508 }
509 next = 0;
510 for (p = NULL, q = fp->ipq_fragq.lh_first; q != NULL;
511 p = q, q = q->ipqe_q.le_next) {
512 if (q->ipqe_ip->ip_off != next)
513 return (0);
514 next += q->ipqe_ip->ip_len;
515 }
516 if (p->ipqe_mff)
517 return (0);
518
519 /*
520 * Reassembly is complete; concatenate fragments.
521 */
522 q = fp->ipq_fragq.lh_first;
523 ip = q->ipqe_ip;
524 m = dtom(q->ipqe_ip);
525 t = m->m_next;
526 m->m_next = 0;
527 m_cat(m, t);
528 nq = q->ipqe_q.le_next;
529 FREE(q, M_IPQ);
530 for (q = nq; q != NULL; q = nq) {
531 t = dtom(q->ipqe_ip);
532 nq = q->ipqe_q.le_next;
533 FREE(q, M_IPQ);
534 m_cat(m, t);
535 }
536
537 /*
538 * Create header for new ip packet by
539 * modifying header of first packet;
540 * dequeue and discard fragment reassembly header.
541 * Make header visible.
542 */
543 ip->ip_len = next;
544 ip->ip_src = fp->ipq_src;
545 ip->ip_dst = fp->ipq_dst;
546 LIST_REMOVE(fp, ipq_q);
547 (void) m_free(dtom(fp));
548 m->m_len += (ip->ip_hl << 2);
549 m->m_data -= (ip->ip_hl << 2);
550 /* some debugging cruft by sklower, below, will go away soon */
551 if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
552 register int plen = 0;
553 for (t = m; m; m = m->m_next)
554 plen += m->m_len;
555 t->m_pkthdr.len = plen;
556 }
557 return (ip);
558
559 dropfrag:
560 ipstat.ips_fragdropped++;
561 m_freem(m);
562 FREE(ipqe, M_IPQ);
563 return (0);
564 }
565
566 /*
567 * Free a fragment reassembly header and all
568 * associated datagrams.
569 */
570 void
571 ip_freef(fp)
572 struct ipq *fp;
573 {
574 register struct ipqent *q, *p;
575
576 for (q = fp->ipq_fragq.lh_first; q != NULL; q = p) {
577 p = q->ipqe_q.le_next;
578 m_freem(dtom(q->ipqe_ip));
579 LIST_REMOVE(q, ipqe_q);
580 FREE(q, M_IPQ);
581 }
582 LIST_REMOVE(fp, ipq_q);
583 (void) m_free(dtom(fp));
584 }
585
586 /*
587 * IP timer processing;
588 * if a timer expires on a reassembly
589 * queue, discard it.
590 */
591 void
592 ip_slowtimo()
593 {
594 register struct ipq *fp, *nfp;
595 int s = splsoftnet();
596
597 for (fp = ipq.lh_first; fp != NULL; fp = nfp) {
598 nfp = fp->ipq_q.le_next;
599 if (--fp->ipq_ttl == 0) {
600 ipstat.ips_fragtimeout++;
601 ip_freef(fp);
602 }
603 }
604 splx(s);
605 }
606
607 /*
608 * Drain off all datagram fragments.
609 */
610 void
611 ip_drain()
612 {
613
614 while (ipq.lh_first != NULL) {
615 ipstat.ips_fragdropped++;
616 ip_freef(ipq.lh_first);
617 }
618 }
619
620 /*
621 * Do option processing on a datagram,
622 * possibly discarding it if bad options are encountered,
623 * or forwarding it if source-routed.
624 * Returns 1 if packet has been forwarded/freed,
625 * 0 if the packet should be processed further.
626 */
627 int
628 ip_dooptions(m)
629 struct mbuf *m;
630 {
631 register struct ip *ip = mtod(m, struct ip *);
632 register u_char *cp;
633 register struct ip_timestamp *ipt;
634 register struct in_ifaddr *ia;
635 int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
636 struct in_addr *sin, dst;
637 n_time ntime;
638
639 dst = ip->ip_dst;
640 cp = (u_char *)(ip + 1);
641 cnt = (ip->ip_hl << 2) - sizeof (struct ip);
642 for (; cnt > 0; cnt -= optlen, cp += optlen) {
643 opt = cp[IPOPT_OPTVAL];
644 if (opt == IPOPT_EOL)
645 break;
646 if (opt == IPOPT_NOP)
647 optlen = 1;
648 else {
649 optlen = cp[IPOPT_OLEN];
650 if (optlen <= 0 || optlen > cnt) {
651 code = &cp[IPOPT_OLEN] - (u_char *)ip;
652 goto bad;
653 }
654 }
655 switch (opt) {
656
657 default:
658 break;
659
660 /*
661 * Source routing with record.
662 * Find interface with current destination address.
663 * If none on this machine then drop if strictly routed,
664 * or do nothing if loosely routed.
665 * Record interface address and bring up next address
666 * component. If strictly routed make sure next
667 * address is on directly accessible net.
668 */
669 case IPOPT_LSRR:
670 case IPOPT_SSRR:
671 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
672 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
673 goto bad;
674 }
675 ipaddr.sin_addr = ip->ip_dst;
676 ia = ifatoia(ifa_ifwithaddr(sintosa(&ipaddr)));
677 if (ia == 0) {
678 if (opt == IPOPT_SSRR) {
679 type = ICMP_UNREACH;
680 code = ICMP_UNREACH_SRCFAIL;
681 goto bad;
682 }
683 /*
684 * Loose routing, and not at next destination
685 * yet; nothing to do except forward.
686 */
687 break;
688 }
689 off--; /* 0 origin */
690 if (off > optlen - sizeof(struct in_addr)) {
691 /*
692 * End of source route. Should be for us.
693 */
694 save_rte(cp, ip->ip_src);
695 break;
696 }
697 /*
698 * locate outgoing interface
699 */
700 bcopy((caddr_t)(cp + off), (caddr_t)&ipaddr.sin_addr,
701 sizeof(ipaddr.sin_addr));
702 if (opt == IPOPT_SSRR) {
703 #define INA struct in_ifaddr *
704 #define SA struct sockaddr *
705 if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
706 ia = (INA)ifa_ifwithnet((SA)&ipaddr);
707 } else
708 ia = ip_rtaddr(ipaddr.sin_addr);
709 if (ia == 0) {
710 type = ICMP_UNREACH;
711 code = ICMP_UNREACH_SRCFAIL;
712 goto bad;
713 }
714 ip->ip_dst = ipaddr.sin_addr;
715 bcopy((caddr_t)&ia->ia_addr.sin_addr,
716 (caddr_t)(cp + off), sizeof(struct in_addr));
717 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
718 /*
719 * Let ip_intr's mcast routing check handle mcast pkts
720 */
721 forward = !IN_MULTICAST(ip->ip_dst.s_addr);
722 break;
723
724 case IPOPT_RR:
725 if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
726 code = &cp[IPOPT_OFFSET] - (u_char *)ip;
727 goto bad;
728 }
729 /*
730 * If no space remains, ignore.
731 */
732 off--; /* 0 origin */
733 if (off > optlen - sizeof(struct in_addr))
734 break;
735 bcopy((caddr_t)(&ip->ip_dst), (caddr_t)&ipaddr.sin_addr,
736 sizeof(ipaddr.sin_addr));
737 /*
738 * locate outgoing interface; if we're the destination,
739 * use the incoming interface (should be same).
740 */
741 if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
742 (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
743 type = ICMP_UNREACH;
744 code = ICMP_UNREACH_HOST;
745 goto bad;
746 }
747 bcopy((caddr_t)&ia->ia_addr.sin_addr,
748 (caddr_t)(cp + off), sizeof(struct in_addr));
749 cp[IPOPT_OFFSET] += sizeof(struct in_addr);
750 break;
751
752 case IPOPT_TS:
753 code = cp - (u_char *)ip;
754 ipt = (struct ip_timestamp *)cp;
755 if (ipt->ipt_len < 5)
756 goto bad;
757 if (ipt->ipt_ptr > ipt->ipt_len - sizeof (int32_t)) {
758 if (++ipt->ipt_oflw == 0)
759 goto bad;
760 break;
761 }
762 sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
763 switch (ipt->ipt_flg) {
764
765 case IPOPT_TS_TSONLY:
766 break;
767
768 case IPOPT_TS_TSANDADDR:
769 if (ipt->ipt_ptr + sizeof(n_time) +
770 sizeof(struct in_addr) > ipt->ipt_len)
771 goto bad;
772 ipaddr.sin_addr = dst;
773 ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
774 m->m_pkthdr.rcvif);
775 if (ia == 0)
776 continue;
777 bcopy((caddr_t)&ia->ia_addr.sin_addr,
778 (caddr_t)sin, sizeof(struct in_addr));
779 ipt->ipt_ptr += sizeof(struct in_addr);
780 break;
781
782 case IPOPT_TS_PRESPEC:
783 if (ipt->ipt_ptr + sizeof(n_time) +
784 sizeof(struct in_addr) > ipt->ipt_len)
785 goto bad;
786 bcopy((caddr_t)sin, (caddr_t)&ipaddr.sin_addr,
787 sizeof(struct in_addr));
788 if (ifa_ifwithaddr((SA)&ipaddr) == 0)
789 continue;
790 ipt->ipt_ptr += sizeof(struct in_addr);
791 break;
792
793 default:
794 goto bad;
795 }
796 ntime = iptime();
797 bcopy((caddr_t)&ntime, (caddr_t)cp + ipt->ipt_ptr - 1,
798 sizeof(n_time));
799 ipt->ipt_ptr += sizeof(n_time);
800 }
801 }
802 if (forward) {
803 if (ip_forwsrcrt == 0) {
804 type = ICMP_UNREACH;
805 code = ICMP_UNREACH_SRCFAIL;
806 goto bad;
807 }
808 ip_forward(m, 1);
809 return (1);
810 }
811 return (0);
812 bad:
813 ip->ip_len -= ip->ip_hl << 2; /* XXX icmp_error adds in hdr length */
814 icmp_error(m, type, code, 0, 0);
815 ipstat.ips_badoptions++;
816 return (1);
817 }
818
819 /*
820 * Given address of next destination (final or next hop),
821 * return internet address info of interface to be used to get there.
822 */
823 struct in_ifaddr *
824 ip_rtaddr(dst)
825 struct in_addr dst;
826 {
827 register struct sockaddr_in *sin;
828
829 sin = satosin(&ipforward_rt.ro_dst);
830
831 if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
832 if (ipforward_rt.ro_rt) {
833 RTFREE(ipforward_rt.ro_rt);
834 ipforward_rt.ro_rt = 0;
835 }
836 sin->sin_family = AF_INET;
837 sin->sin_len = sizeof(*sin);
838 sin->sin_addr = dst;
839
840 rtalloc(&ipforward_rt);
841 }
842 if (ipforward_rt.ro_rt == 0)
843 return ((struct in_ifaddr *)0);
844 return (ifatoia(ipforward_rt.ro_rt->rt_ifa));
845 }
846
847 /*
848 * Save incoming source route for use in replies,
849 * to be picked up later by ip_srcroute if the receiver is interested.
850 */
851 void
852 save_rte(option, dst)
853 u_char *option;
854 struct in_addr dst;
855 {
856 unsigned olen;
857
858 olen = option[IPOPT_OLEN];
859 #ifdef DIAGNOSTIC
860 if (ipprintfs)
861 printf("save_rte: olen %d\n", olen);
862 #endif
863 if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
864 return;
865 bcopy((caddr_t)option, (caddr_t)ip_srcrt.srcopt, olen);
866 ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
867 ip_srcrt.dst = dst;
868 }
869
870 /*
871 * Retrieve incoming source route for use in replies,
872 * in the same form used by setsockopt.
873 * The first hop is placed before the options, will be removed later.
874 */
875 struct mbuf *
876 ip_srcroute()
877 {
878 register struct in_addr *p, *q;
879 register struct mbuf *m;
880
881 if (ip_nhops == 0)
882 return ((struct mbuf *)0);
883 m = m_get(M_DONTWAIT, MT_SOOPTS);
884 if (m == 0)
885 return ((struct mbuf *)0);
886
887 #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
888
889 /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
890 m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
891 OPTSIZ;
892 #ifdef DIAGNOSTIC
893 if (ipprintfs)
894 printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
895 #endif
896
897 /*
898 * First save first hop for return route
899 */
900 p = &ip_srcrt.route[ip_nhops - 1];
901 *(mtod(m, struct in_addr *)) = *p--;
902 #ifdef DIAGNOSTIC
903 if (ipprintfs)
904 printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
905 #endif
906
907 /*
908 * Copy option fields and padding (nop) to mbuf.
909 */
910 ip_srcrt.nop = IPOPT_NOP;
911 ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
912 bcopy((caddr_t)&ip_srcrt.nop,
913 mtod(m, caddr_t) + sizeof(struct in_addr), OPTSIZ);
914 q = (struct in_addr *)(mtod(m, caddr_t) +
915 sizeof(struct in_addr) + OPTSIZ);
916 #undef OPTSIZ
917 /*
918 * Record return path as an IP source route,
919 * reversing the path (pointers are now aligned).
920 */
921 while (p >= ip_srcrt.route) {
922 #ifdef DIAGNOSTIC
923 if (ipprintfs)
924 printf(" %lx", ntohl(q->s_addr));
925 #endif
926 *q++ = *p--;
927 }
928 /*
929 * Last hop goes to final destination.
930 */
931 *q = ip_srcrt.dst;
932 #ifdef DIAGNOSTIC
933 if (ipprintfs)
934 printf(" %lx\n", ntohl(q->s_addr));
935 #endif
936 return (m);
937 }
938
939 /*
940 * Strip out IP options, at higher
941 * level protocol in the kernel.
942 * Second argument is buffer to which options
943 * will be moved, and return value is their length.
944 * XXX should be deleted; last arg currently ignored.
945 */
946 void
947 ip_stripoptions(m, mopt)
948 register struct mbuf *m;
949 struct mbuf *mopt;
950 {
951 register int i;
952 struct ip *ip = mtod(m, struct ip *);
953 register caddr_t opts;
954 int olen;
955
956 olen = (ip->ip_hl<<2) - sizeof (struct ip);
957 opts = (caddr_t)(ip + 1);
958 i = m->m_len - (sizeof (struct ip) + olen);
959 bcopy(opts + olen, opts, (unsigned)i);
960 m->m_len -= olen;
961 if (m->m_flags & M_PKTHDR)
962 m->m_pkthdr.len -= olen;
963 ip->ip_hl = sizeof(struct ip) >> 2;
964 }
965
966 int inetctlerrmap[PRC_NCMDS] = {
967 0, 0, 0, 0,
968 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
969 EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
970 EMSGSIZE, EHOSTUNREACH, 0, 0,
971 0, 0, 0, 0,
972 ENOPROTOOPT
973 };
974
975 /*
976 * Forward a packet. If some error occurs return the sender
977 * an icmp packet. Note we can't always generate a meaningful
978 * icmp message because icmp doesn't have a large enough repertoire
979 * of codes and types.
980 *
981 * If not forwarding, just drop the packet. This could be confusing
982 * if ipforwarding was zero but some routing protocol was advancing
983 * us as a gateway to somewhere. However, we must let the routing
984 * protocol deal with that.
985 *
986 * The srcrt parameter indicates whether the packet is being forwarded
987 * via a source route.
988 */
989 void
990 ip_forward(m, srcrt)
991 struct mbuf *m;
992 int srcrt;
993 {
994 register struct ip *ip = mtod(m, struct ip *);
995 register struct sockaddr_in *sin;
996 register struct rtentry *rt;
997 int error, type = 0, code;
998 struct mbuf *mcopy;
999 n_long dest;
1000 struct ifnet *destifp;
1001
1002 dest = 0;
1003 #ifdef DIAGNOSTIC
1004 if (ipprintfs)
1005 printf("forward: src %x dst %x ttl %x\n", ip->ip_src,
1006 ip->ip_dst, ip->ip_ttl);
1007 #endif
1008 if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
1009 ipstat.ips_cantforward++;
1010 m_freem(m);
1011 return;
1012 }
1013 HTONS(ip->ip_id);
1014 if (ip->ip_ttl <= IPTTLDEC) {
1015 icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1016 return;
1017 }
1018 ip->ip_ttl -= IPTTLDEC;
1019
1020 sin = satosin(&ipforward_rt.ro_dst);
1021 if ((rt = ipforward_rt.ro_rt) == 0 ||
1022 ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1023 if (ipforward_rt.ro_rt) {
1024 RTFREE(ipforward_rt.ro_rt);
1025 ipforward_rt.ro_rt = 0;
1026 }
1027 sin->sin_family = AF_INET;
1028 sin->sin_len = sizeof(*sin);
1029 sin->sin_addr = ip->ip_dst;
1030
1031 rtalloc(&ipforward_rt);
1032 if (ipforward_rt.ro_rt == 0) {
1033 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1034 return;
1035 }
1036 rt = ipforward_rt.ro_rt;
1037 }
1038
1039 /*
1040 * Save at most 64 bytes of the packet in case
1041 * we need to generate an ICMP message to the src.
1042 */
1043 mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1044
1045 /*
1046 * If forwarding packet using same interface that it came in on,
1047 * perhaps should send a redirect to sender to shortcut a hop.
1048 * Only send redirect if source is sending directly to us,
1049 * and if packet was not source routed (or has any options).
1050 * Also, don't send redirect if forwarding using a default route
1051 * or a route modified by a redirect.
1052 */
1053 if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1054 (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1055 satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1056 ipsendredirects && !srcrt) {
1057 if (rt->rt_ifa &&
1058 (ip->ip_src.s_addr & ifatoia(rt->rt_ifa)->ia_subnetmask) ==
1059 ifatoia(rt->rt_ifa)->ia_subnet) {
1060 if (rt->rt_flags & RTF_GATEWAY)
1061 dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1062 else
1063 dest = ip->ip_dst.s_addr;
1064 /* Router requirements says to only send host redirects */
1065 type = ICMP_REDIRECT;
1066 code = ICMP_REDIRECT_HOST;
1067 #ifdef DIAGNOSTIC
1068 if (ipprintfs)
1069 printf("redirect (%d) to %lx\n", code, (u_int32_t)dest);
1070 #endif
1071 }
1072 }
1073
1074 error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING
1075 #ifdef DIRECTED_BROADCAST
1076 | IP_ALLOWBROADCAST
1077 #endif
1078 , 0);
1079 if (error)
1080 ipstat.ips_cantforward++;
1081 else {
1082 ipstat.ips_forward++;
1083 if (type)
1084 ipstat.ips_redirectsent++;
1085 else {
1086 if (mcopy)
1087 m_freem(mcopy);
1088 return;
1089 }
1090 }
1091 if (mcopy == NULL)
1092 return;
1093 destifp = NULL;
1094
1095 switch (error) {
1096
1097 case 0: /* forwarded, but need redirect */
1098 /* type, code set above */
1099 break;
1100
1101 case ENETUNREACH: /* shouldn't happen, checked above */
1102 case EHOSTUNREACH:
1103 case ENETDOWN:
1104 case EHOSTDOWN:
1105 default:
1106 type = ICMP_UNREACH;
1107 code = ICMP_UNREACH_HOST;
1108 break;
1109
1110 case EMSGSIZE:
1111 type = ICMP_UNREACH;
1112 code = ICMP_UNREACH_NEEDFRAG;
1113 if (ipforward_rt.ro_rt)
1114 destifp = ipforward_rt.ro_rt->rt_ifp;
1115 ipstat.ips_cantfrag++;
1116 break;
1117
1118 case ENOBUFS:
1119 type = ICMP_SOURCEQUENCH;
1120 code = 0;
1121 break;
1122 }
1123 icmp_error(mcopy, type, code, dest, destifp);
1124 }
1125
1126 int
1127 ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1128 int *name;
1129 u_int namelen;
1130 void *oldp;
1131 size_t *oldlenp;
1132 void *newp;
1133 size_t newlen;
1134 {
1135 int temp;
1136
1137 /* All sysctl names at this level are terminal. */
1138 if (namelen != 1)
1139 return (ENOTDIR);
1140
1141 switch (name[0]) {
1142 case IPCTL_FORWARDING:
1143 return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1144 case IPCTL_SENDREDIRECTS:
1145 return (sysctl_int(oldp, oldlenp, newp, newlen,
1146 &ipsendredirects));
1147 case IPCTL_DEFTTL:
1148 return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1149 #ifdef notyet
1150 case IPCTL_DEFMTU:
1151 return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1152 #endif
1153 case IPCTL_FORWSRCRT:
1154 /*
1155 * Don't allow this to change in a secure environment.
1156 */
1157 if (securelevel > 0)
1158 return (EPERM);
1159 return (sysctl_int(oldp, oldlenp, newp, newlen,
1160 &ip_forwsrcrt));
1161 default:
1162 return (EOPNOTSUPP);
1163 }
1164 /* NOTREACHED */
1165 }
1166