raw_ip.c revision 1.70.2.1 1 /* $NetBSD: raw_ip.c,v 1.70.2.1 2003/07/02 15:27:00 darrenr Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
65 */
66
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.70.2.1 2003/07/02 15:27:00 darrenr Exp $");
69
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94
95 #include <machine/stdarg.h>
96
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100
101 struct inpcbtable rawcbtable;
102
103 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
104 struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
105 int rip_bind __P((struct inpcb *, struct mbuf *));
106 int rip_connect __P((struct inpcb *, struct mbuf *));
107 void rip_disconnect __P((struct inpcb *));
108
109 /*
110 * Nominal space allocated to a raw ip socket.
111 */
112 #define RIPSNDQ 8192
113 #define RIPRCVQ 8192
114
115 /*
116 * Raw interface to IP protocol.
117 */
118
119 /*
120 * Initialize raw connection block q.
121 */
122 void
123 rip_init()
124 {
125
126 in_pcbinit(&rawcbtable, 1, 1);
127 }
128
129 /*
130 * Setup generic address and protocol structures
131 * for raw_input routine, then pass them along with
132 * mbuf chain.
133 */
134 void
135 #if __STDC__
136 rip_input(struct mbuf *m, ...)
137 #else
138 rip_input(m, va_alist)
139 struct mbuf *m;
140 va_dcl
141 #endif
142 {
143 int proto;
144 struct ip *ip = mtod(m, struct ip *);
145 struct inpcb *inp;
146 struct inpcb *last = 0;
147 struct mbuf *opts = 0;
148 struct sockaddr_in ripsrc;
149 va_list ap;
150
151 va_start(ap, m);
152 (void)va_arg(ap, int); /* ignore value, advance ap */
153 proto = va_arg(ap, int);
154 va_end(ap);
155
156 ripsrc.sin_family = AF_INET;
157 ripsrc.sin_len = sizeof(struct sockaddr_in);
158 ripsrc.sin_addr = ip->ip_src;
159 ripsrc.sin_port = 0;
160 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
161
162 /*
163 * XXX Compatibility: programs using raw IP expect ip_len
164 * XXX to have the header length subtracted, and in host order.
165 * XXX ip_off is also expected to be host order.
166 */
167 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
168 NTOHS(ip->ip_off);
169
170 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
171 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
172 continue;
173 if (!in_nullhost(inp->inp_laddr) &&
174 !in_hosteq(inp->inp_laddr, ip->ip_dst))
175 continue;
176 if (!in_nullhost(inp->inp_faddr) &&
177 !in_hosteq(inp->inp_faddr, ip->ip_src))
178 continue;
179 if (last) {
180 struct mbuf *n;
181
182 #ifdef IPSEC
183 /* check AH/ESP integrity. */
184 if (ipsec4_in_reject_so(m, last->inp_socket)) {
185 ipsecstat.in_polvio++;
186 /* do not inject data to pcb */
187 } else
188 #endif /*IPSEC*/
189 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
190 if (last->inp_flags & INP_CONTROLOPTS ||
191 last->inp_socket->so_options & SO_TIMESTAMP)
192 ip_savecontrol(last, &opts, ip, n);
193 if (sbappendaddr(&last->inp_socket->so_rcv,
194 sintosa(&ripsrc), n, opts) == 0) {
195 /* should notify about lost packet */
196 m_freem(n);
197 if (opts)
198 m_freem(opts);
199 } else
200 sorwakeup(last->inp_socket);
201 opts = NULL;
202 }
203 }
204 last = inp;
205 }
206 #ifdef IPSEC
207 /* check AH/ESP integrity. */
208 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
209 m_freem(m);
210 ipsecstat.in_polvio++;
211 ipstat.ips_delivered--;
212 /* do not inject data to pcb */
213 } else
214 #endif /*IPSEC*/
215 if (last) {
216 if (last->inp_flags & INP_CONTROLOPTS ||
217 last->inp_socket->so_options & SO_TIMESTAMP)
218 ip_savecontrol(last, &opts, ip, m);
219 if (sbappendaddr(&last->inp_socket->so_rcv,
220 sintosa(&ripsrc), m, opts) == 0) {
221 m_freem(m);
222 if (opts)
223 m_freem(opts);
224 } else
225 sorwakeup(last->inp_socket);
226 } else {
227 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
228 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
229 0, 0);
230 ipstat.ips_noproto++;
231 ipstat.ips_delivered--;
232 } else
233 m_freem(m);
234 }
235 return;
236 }
237
238 int
239 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
240 struct inpcbtable *table;
241 struct in_addr faddr, laddr;
242 int proto;
243 int errno;
244 void (*notify) __P((struct inpcb *, int));
245 {
246 struct inpcb *inp, *ninp;
247 int nmatch;
248
249 nmatch = 0;
250 for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
251 inp != (struct inpcb *)&table->inpt_queue;
252 inp = ninp) {
253 ninp = inp->inp_queue.cqe_next;
254 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
255 continue;
256 if (in_hosteq(inp->inp_faddr, faddr) &&
257 in_hosteq(inp->inp_laddr, laddr)) {
258 (*notify)(inp, errno);
259 nmatch++;
260 }
261 }
262
263 return nmatch;
264 }
265
266 void *
267 rip_ctlinput(cmd, sa, v)
268 int cmd;
269 struct sockaddr *sa;
270 void *v;
271 {
272 struct ip *ip = v;
273 void (*notify) __P((struct inpcb *, int)) = in_rtchange;
274 int errno;
275
276 if (sa->sa_family != AF_INET ||
277 sa->sa_len != sizeof(struct sockaddr_in))
278 return NULL;
279 if ((unsigned)cmd >= PRC_NCMDS)
280 return NULL;
281 errno = inetctlerrmap[cmd];
282 if (PRC_IS_REDIRECT(cmd))
283 notify = in_rtchange, ip = 0;
284 else if (cmd == PRC_HOSTDEAD)
285 ip = 0;
286 else if (errno == 0)
287 return NULL;
288 if (ip) {
289 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
290 ip->ip_src, ip->ip_p, errno, notify);
291
292 /* XXX mapped address case */
293 } else
294 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
295 notify);
296 return NULL;
297 }
298
299 /*
300 * Generate IP header and pass packet to ip_output.
301 * Tack on options user may have setup with control call.
302 */
303 int
304 #if __STDC__
305 rip_output(struct mbuf *m, ...)
306 #else
307 rip_output(m, va_alist)
308 struct mbuf *m;
309 va_dcl
310 #endif
311 {
312 struct inpcb *inp;
313 struct ip *ip;
314 struct mbuf *opts;
315 int flags;
316 va_list ap;
317
318 va_start(ap, m);
319 inp = va_arg(ap, struct inpcb *);
320 va_end(ap);
321
322 flags =
323 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
324 | IP_RETURNMTU;
325
326 /*
327 * If the user handed us a complete IP packet, use it.
328 * Otherwise, allocate an mbuf for a header and fill it in.
329 */
330 if ((inp->inp_flags & INP_HDRINCL) == 0) {
331 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
332 m_freem(m);
333 return (EMSGSIZE);
334 }
335 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
336 if (!m)
337 return (ENOBUFS);
338 ip = mtod(m, struct ip *);
339 ip->ip_tos = 0;
340 ip->ip_off = htons(0);
341 ip->ip_p = inp->inp_ip.ip_p;
342 ip->ip_len = htons(m->m_pkthdr.len);
343 ip->ip_src = inp->inp_laddr;
344 ip->ip_dst = inp->inp_faddr;
345 ip->ip_ttl = MAXTTL;
346 opts = inp->inp_options;
347 } else {
348 if (m->m_pkthdr.len > IP_MAXPACKET) {
349 m_freem(m);
350 return (EMSGSIZE);
351 }
352 ip = mtod(m, struct ip *);
353
354 /*
355 * If the mbuf is read-only, we need to allocate
356 * a new mbuf for the header, since we need to
357 * modify the header.
358 */
359 if (M_READONLY(m)) {
360 int hlen = ip->ip_hl << 2;
361
362 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3);
363 if (m == NULL)
364 return (ENOMEM); /* XXX */
365 ip = mtod(m, struct ip *);
366 }
367
368 /* XXX userland passes ip_len and ip_off in host order */
369 if (m->m_pkthdr.len != ip->ip_len) {
370 m_freem(m);
371 return (EINVAL);
372 }
373 HTONS(ip->ip_len);
374 HTONS(ip->ip_off);
375 if (ip->ip_id == 0)
376 ip->ip_id = htons(ip_id++);
377 opts = NULL;
378 /* XXX prevent ip_output from overwriting header fields */
379 flags |= IP_RAWOUTPUT;
380 ipstat.ips_rawout++;
381 }
382 #ifdef IPSEC
383 if (ipsec_setsocket(m, inp->inp_socket) != 0) {
384 m_freem(m);
385 return ENOBUFS;
386 }
387 #endif /*IPSEC*/
388 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
389 &inp->inp_errormtu));
390 }
391
392 /*
393 * Raw IP socket option processing.
394 */
395 int
396 rip_ctloutput(op, so, level, optname, m)
397 int op;
398 struct socket *so;
399 int level, optname;
400 struct mbuf **m;
401 {
402 struct inpcb *inp = sotoinpcb(so);
403 int error = 0;
404
405 if (level != IPPROTO_IP) {
406 error = ENOPROTOOPT;
407 if (op == PRCO_SETOPT && *m != 0)
408 (void) m_free(*m);
409 } else switch (op) {
410
411 case PRCO_SETOPT:
412 switch (optname) {
413 case IP_HDRINCL:
414 if (*m == 0 || (*m)->m_len < sizeof (int))
415 error = EINVAL;
416 else {
417 if (*mtod(*m, int *))
418 inp->inp_flags |= INP_HDRINCL;
419 else
420 inp->inp_flags &= ~INP_HDRINCL;
421 }
422 if (*m != 0)
423 (void) m_free(*m);
424 break;
425
426 #ifdef MROUTING
427 case MRT_INIT:
428 case MRT_DONE:
429 case MRT_ADD_VIF:
430 case MRT_DEL_VIF:
431 case MRT_ADD_MFC:
432 case MRT_DEL_MFC:
433 case MRT_ASSERT:
434 error = ip_mrouter_set(so, optname, m);
435 break;
436 #endif
437
438 default:
439 error = ip_ctloutput(op, so, level, optname, m);
440 break;
441 }
442 break;
443
444 case PRCO_GETOPT:
445 switch (optname) {
446 case IP_HDRINCL:
447 *m = m_get(M_WAIT, MT_SOOPTS);
448 MCLAIM((*m), so->so_mowner);
449 (*m)->m_len = sizeof (int);
450 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
451 break;
452
453 #ifdef MROUTING
454 case MRT_VERSION:
455 case MRT_ASSERT:
456 error = ip_mrouter_get(so, optname, m);
457 break;
458 #endif
459
460 default:
461 error = ip_ctloutput(op, so, level, optname, m);
462 break;
463 }
464 break;
465 }
466 return (error);
467 }
468
469 int
470 rip_bind(inp, nam)
471 struct inpcb *inp;
472 struct mbuf *nam;
473 {
474 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
475
476 if (nam->m_len != sizeof(*addr))
477 return (EINVAL);
478 if (TAILQ_FIRST(&ifnet) == 0)
479 return (EADDRNOTAVAIL);
480 if (addr->sin_family != AF_INET &&
481 addr->sin_family != AF_IMPLINK)
482 return (EAFNOSUPPORT);
483 if (!in_nullhost(addr->sin_addr) &&
484 ifa_ifwithaddr(sintosa(addr)) == 0)
485 return (EADDRNOTAVAIL);
486 inp->inp_laddr = addr->sin_addr;
487 return (0);
488 }
489
490 int
491 rip_connect(inp, nam)
492 struct inpcb *inp;
493 struct mbuf *nam;
494 {
495 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
496
497 if (nam->m_len != sizeof(*addr))
498 return (EINVAL);
499 if (TAILQ_FIRST(&ifnet) == 0)
500 return (EADDRNOTAVAIL);
501 if (addr->sin_family != AF_INET &&
502 addr->sin_family != AF_IMPLINK)
503 return (EAFNOSUPPORT);
504 inp->inp_faddr = addr->sin_addr;
505 return (0);
506 }
507
508 void
509 rip_disconnect(inp)
510 struct inpcb *inp;
511 {
512
513 inp->inp_faddr = zeroin_addr;
514 }
515
516 u_long rip_sendspace = RIPSNDQ;
517 u_long rip_recvspace = RIPRCVQ;
518
519 /*ARGSUSED*/
520 int
521 rip_usrreq(so, req, m, nam, control, l)
522 struct socket *so;
523 int req;
524 struct mbuf *m, *nam, *control;
525 struct lwp *l;
526 {
527 struct inpcb *inp;
528 struct proc *p;
529 int s;
530 int error = 0;
531 #ifdef MROUTING
532 extern struct socket *ip_mrouter;
533 #endif
534
535 p = l ? l->l_proc : NULL;
536 if (req == PRU_CONTROL)
537 return (in_control(so, (long)m, (caddr_t)nam,
538 (struct ifnet *)control, p));
539
540 if (req == PRU_PURGEIF) {
541 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
542 in_purgeif((struct ifnet *)control);
543 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
544 return (0);
545 }
546
547 s = splsoftnet();
548 inp = sotoinpcb(so);
549 #ifdef DIAGNOSTIC
550 if (req != PRU_SEND && req != PRU_SENDOOB && control)
551 panic("rip_usrreq: unexpected control mbuf");
552 #endif
553 if (inp == 0 && req != PRU_ATTACH) {
554 error = EINVAL;
555 goto release;
556 }
557
558 switch (req) {
559
560 case PRU_ATTACH:
561 if (inp != 0) {
562 error = EISCONN;
563 break;
564 }
565 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
566 error = EACCES;
567 break;
568 }
569 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
570 error = soreserve(so, rip_sendspace, rip_recvspace);
571 if (error)
572 break;
573 }
574 error = in_pcballoc(so, &rawcbtable);
575 if (error)
576 break;
577 inp = sotoinpcb(so);
578 inp->inp_ip.ip_p = (long)nam;
579 break;
580
581 case PRU_DETACH:
582 #ifdef MROUTING
583 if (so == ip_mrouter)
584 ip_mrouter_done();
585 #endif
586 in_pcbdetach(inp);
587 break;
588
589 case PRU_BIND:
590 error = rip_bind(inp, nam);
591 break;
592
593 case PRU_LISTEN:
594 error = EOPNOTSUPP;
595 break;
596
597 case PRU_CONNECT:
598 error = rip_connect(inp, nam);
599 if (error)
600 break;
601 soisconnected(so);
602 break;
603
604 case PRU_CONNECT2:
605 error = EOPNOTSUPP;
606 break;
607
608 case PRU_DISCONNECT:
609 soisdisconnected(so);
610 rip_disconnect(inp);
611 break;
612
613 /*
614 * Mark the connection as being incapable of further input.
615 */
616 case PRU_SHUTDOWN:
617 socantsendmore(so);
618 break;
619
620 case PRU_RCVD:
621 error = EOPNOTSUPP;
622 break;
623
624 /*
625 * Ship a packet out. The appropriate raw output
626 * routine handles any massaging necessary.
627 */
628 case PRU_SEND:
629 if (control && control->m_len) {
630 m_freem(control);
631 m_freem(m);
632 error = EINVAL;
633 break;
634 }
635 {
636 if (nam) {
637 if ((so->so_state & SS_ISCONNECTED) != 0) {
638 error = EISCONN;
639 goto die;
640 }
641 error = rip_connect(inp, nam);
642 if (error) {
643 die:
644 m_freem(m);
645 break;
646 }
647 } else {
648 if ((so->so_state & SS_ISCONNECTED) == 0) {
649 error = ENOTCONN;
650 goto die;
651 }
652 }
653 error = rip_output(m, inp);
654 if (nam)
655 rip_disconnect(inp);
656 }
657 break;
658
659 case PRU_SENSE:
660 /*
661 * stat: don't bother with a blocksize.
662 */
663 splx(s);
664 return (0);
665
666 case PRU_RCVOOB:
667 error = EOPNOTSUPP;
668 break;
669
670 case PRU_SENDOOB:
671 m_freem(control);
672 m_freem(m);
673 error = EOPNOTSUPP;
674 break;
675
676 case PRU_SOCKADDR:
677 in_setsockaddr(inp, nam);
678 break;
679
680 case PRU_PEERADDR:
681 in_setpeeraddr(inp, nam);
682 break;
683
684 default:
685 panic("rip_usrreq");
686 }
687
688 release:
689 splx(s);
690 return (error);
691 }
692