raw_ip.c revision 1.71 1 /* $NetBSD: raw_ip.c,v 1.71 2003/08/07 16:33:14 agc Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.71 2003/08/07 16:33:14 agc Exp $");
65
66 #include "opt_ipsec.h"
67 #include "opt_mrouting.h"
68
69 #include <sys/param.h>
70 #include <sys/malloc.h>
71 #include <sys/mbuf.h>
72 #include <sys/socket.h>
73 #include <sys/protosw.h>
74 #include <sys/socketvar.h>
75 #include <sys/errno.h>
76 #include <sys/systm.h>
77 #include <sys/proc.h>
78
79 #include <net/if.h>
80 #include <net/route.h>
81
82 #include <netinet/in.h>
83 #include <netinet/in_systm.h>
84 #include <netinet/ip.h>
85 #include <netinet/ip_var.h>
86 #include <netinet/ip_mroute.h>
87 #include <netinet/ip_icmp.h>
88 #include <netinet/in_pcb.h>
89 #include <netinet/in_var.h>
90
91 #include <machine/stdarg.h>
92
93 #ifdef IPSEC
94 #include <netinet6/ipsec.h>
95 #endif /*IPSEC*/
96
97 struct inpcbtable rawcbtable;
98
99 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
100 struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
101 int rip_bind __P((struct inpcb *, struct mbuf *));
102 int rip_connect __P((struct inpcb *, struct mbuf *));
103 void rip_disconnect __P((struct inpcb *));
104
105 /*
106 * Nominal space allocated to a raw ip socket.
107 */
108 #define RIPSNDQ 8192
109 #define RIPRCVQ 8192
110
111 /*
112 * Raw interface to IP protocol.
113 */
114
115 /*
116 * Initialize raw connection block q.
117 */
118 void
119 rip_init()
120 {
121
122 in_pcbinit(&rawcbtable, 1, 1);
123 }
124
125 /*
126 * Setup generic address and protocol structures
127 * for raw_input routine, then pass them along with
128 * mbuf chain.
129 */
130 void
131 #if __STDC__
132 rip_input(struct mbuf *m, ...)
133 #else
134 rip_input(m, va_alist)
135 struct mbuf *m;
136 va_dcl
137 #endif
138 {
139 int proto;
140 struct ip *ip = mtod(m, struct ip *);
141 struct inpcb *inp;
142 struct inpcb *last = 0;
143 struct mbuf *opts = 0;
144 struct sockaddr_in ripsrc;
145 va_list ap;
146
147 va_start(ap, m);
148 (void)va_arg(ap, int); /* ignore value, advance ap */
149 proto = va_arg(ap, int);
150 va_end(ap);
151
152 ripsrc.sin_family = AF_INET;
153 ripsrc.sin_len = sizeof(struct sockaddr_in);
154 ripsrc.sin_addr = ip->ip_src;
155 ripsrc.sin_port = 0;
156 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
157
158 /*
159 * XXX Compatibility: programs using raw IP expect ip_len
160 * XXX to have the header length subtracted, and in host order.
161 * XXX ip_off is also expected to be host order.
162 */
163 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
164 NTOHS(ip->ip_off);
165
166 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
167 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
168 continue;
169 if (!in_nullhost(inp->inp_laddr) &&
170 !in_hosteq(inp->inp_laddr, ip->ip_dst))
171 continue;
172 if (!in_nullhost(inp->inp_faddr) &&
173 !in_hosteq(inp->inp_faddr, ip->ip_src))
174 continue;
175 if (last) {
176 struct mbuf *n;
177
178 #ifdef IPSEC
179 /* check AH/ESP integrity. */
180 if (ipsec4_in_reject_so(m, last->inp_socket)) {
181 ipsecstat.in_polvio++;
182 /* do not inject data to pcb */
183 } else
184 #endif /*IPSEC*/
185 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
186 if (last->inp_flags & INP_CONTROLOPTS ||
187 last->inp_socket->so_options & SO_TIMESTAMP)
188 ip_savecontrol(last, &opts, ip, n);
189 if (sbappendaddr(&last->inp_socket->so_rcv,
190 sintosa(&ripsrc), n, opts) == 0) {
191 /* should notify about lost packet */
192 m_freem(n);
193 if (opts)
194 m_freem(opts);
195 } else
196 sorwakeup(last->inp_socket);
197 opts = NULL;
198 }
199 }
200 last = inp;
201 }
202 #ifdef IPSEC
203 /* check AH/ESP integrity. */
204 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
205 m_freem(m);
206 ipsecstat.in_polvio++;
207 ipstat.ips_delivered--;
208 /* do not inject data to pcb */
209 } else
210 #endif /*IPSEC*/
211 if (last) {
212 if (last->inp_flags & INP_CONTROLOPTS ||
213 last->inp_socket->so_options & SO_TIMESTAMP)
214 ip_savecontrol(last, &opts, ip, m);
215 if (sbappendaddr(&last->inp_socket->so_rcv,
216 sintosa(&ripsrc), m, opts) == 0) {
217 m_freem(m);
218 if (opts)
219 m_freem(opts);
220 } else
221 sorwakeup(last->inp_socket);
222 } else {
223 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
224 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
225 0, 0);
226 ipstat.ips_noproto++;
227 ipstat.ips_delivered--;
228 } else
229 m_freem(m);
230 }
231 return;
232 }
233
234 int
235 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
236 struct inpcbtable *table;
237 struct in_addr faddr, laddr;
238 int proto;
239 int errno;
240 void (*notify) __P((struct inpcb *, int));
241 {
242 struct inpcb *inp, *ninp;
243 int nmatch;
244
245 nmatch = 0;
246 for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
247 inp != (struct inpcb *)&table->inpt_queue;
248 inp = ninp) {
249 ninp = inp->inp_queue.cqe_next;
250 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
251 continue;
252 if (in_hosteq(inp->inp_faddr, faddr) &&
253 in_hosteq(inp->inp_laddr, laddr)) {
254 (*notify)(inp, errno);
255 nmatch++;
256 }
257 }
258
259 return nmatch;
260 }
261
262 void *
263 rip_ctlinput(cmd, sa, v)
264 int cmd;
265 struct sockaddr *sa;
266 void *v;
267 {
268 struct ip *ip = v;
269 void (*notify) __P((struct inpcb *, int)) = in_rtchange;
270 int errno;
271
272 if (sa->sa_family != AF_INET ||
273 sa->sa_len != sizeof(struct sockaddr_in))
274 return NULL;
275 if ((unsigned)cmd >= PRC_NCMDS)
276 return NULL;
277 errno = inetctlerrmap[cmd];
278 if (PRC_IS_REDIRECT(cmd))
279 notify = in_rtchange, ip = 0;
280 else if (cmd == PRC_HOSTDEAD)
281 ip = 0;
282 else if (errno == 0)
283 return NULL;
284 if (ip) {
285 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
286 ip->ip_src, ip->ip_p, errno, notify);
287
288 /* XXX mapped address case */
289 } else
290 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
291 notify);
292 return NULL;
293 }
294
295 /*
296 * Generate IP header and pass packet to ip_output.
297 * Tack on options user may have setup with control call.
298 */
299 int
300 #if __STDC__
301 rip_output(struct mbuf *m, ...)
302 #else
303 rip_output(m, va_alist)
304 struct mbuf *m;
305 va_dcl
306 #endif
307 {
308 struct inpcb *inp;
309 struct ip *ip;
310 struct mbuf *opts;
311 int flags;
312 va_list ap;
313
314 va_start(ap, m);
315 inp = va_arg(ap, struct inpcb *);
316 va_end(ap);
317
318 flags =
319 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
320 | IP_RETURNMTU;
321
322 /*
323 * If the user handed us a complete IP packet, use it.
324 * Otherwise, allocate an mbuf for a header and fill it in.
325 */
326 if ((inp->inp_flags & INP_HDRINCL) == 0) {
327 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
328 m_freem(m);
329 return (EMSGSIZE);
330 }
331 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
332 if (!m)
333 return (ENOBUFS);
334 ip = mtod(m, struct ip *);
335 ip->ip_tos = 0;
336 ip->ip_off = htons(0);
337 ip->ip_p = inp->inp_ip.ip_p;
338 ip->ip_len = htons(m->m_pkthdr.len);
339 ip->ip_src = inp->inp_laddr;
340 ip->ip_dst = inp->inp_faddr;
341 ip->ip_ttl = MAXTTL;
342 opts = inp->inp_options;
343 } else {
344 if (m->m_pkthdr.len > IP_MAXPACKET) {
345 m_freem(m);
346 return (EMSGSIZE);
347 }
348 ip = mtod(m, struct ip *);
349
350 /*
351 * If the mbuf is read-only, we need to allocate
352 * a new mbuf for the header, since we need to
353 * modify the header.
354 */
355 if (M_READONLY(m)) {
356 int hlen = ip->ip_hl << 2;
357
358 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3);
359 if (m == NULL)
360 return (ENOMEM); /* XXX */
361 ip = mtod(m, struct ip *);
362 }
363
364 /* XXX userland passes ip_len and ip_off in host order */
365 if (m->m_pkthdr.len != ip->ip_len) {
366 m_freem(m);
367 return (EINVAL);
368 }
369 HTONS(ip->ip_len);
370 HTONS(ip->ip_off);
371 if (ip->ip_id == 0)
372 ip->ip_id = htons(ip_id++);
373 opts = NULL;
374 /* XXX prevent ip_output from overwriting header fields */
375 flags |= IP_RAWOUTPUT;
376 ipstat.ips_rawout++;
377 }
378 #ifdef IPSEC
379 if (ipsec_setsocket(m, inp->inp_socket) != 0) {
380 m_freem(m);
381 return ENOBUFS;
382 }
383 #endif /*IPSEC*/
384 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
385 &inp->inp_errormtu));
386 }
387
388 /*
389 * Raw IP socket option processing.
390 */
391 int
392 rip_ctloutput(op, so, level, optname, m)
393 int op;
394 struct socket *so;
395 int level, optname;
396 struct mbuf **m;
397 {
398 struct inpcb *inp = sotoinpcb(so);
399 int error = 0;
400
401 if (level != IPPROTO_IP) {
402 error = ENOPROTOOPT;
403 if (op == PRCO_SETOPT && *m != 0)
404 (void) m_free(*m);
405 } else switch (op) {
406
407 case PRCO_SETOPT:
408 switch (optname) {
409 case IP_HDRINCL:
410 if (*m == 0 || (*m)->m_len < sizeof (int))
411 error = EINVAL;
412 else {
413 if (*mtod(*m, int *))
414 inp->inp_flags |= INP_HDRINCL;
415 else
416 inp->inp_flags &= ~INP_HDRINCL;
417 }
418 if (*m != 0)
419 (void) m_free(*m);
420 break;
421
422 #ifdef MROUTING
423 case MRT_INIT:
424 case MRT_DONE:
425 case MRT_ADD_VIF:
426 case MRT_DEL_VIF:
427 case MRT_ADD_MFC:
428 case MRT_DEL_MFC:
429 case MRT_ASSERT:
430 error = ip_mrouter_set(so, optname, m);
431 break;
432 #endif
433
434 default:
435 error = ip_ctloutput(op, so, level, optname, m);
436 break;
437 }
438 break;
439
440 case PRCO_GETOPT:
441 switch (optname) {
442 case IP_HDRINCL:
443 *m = m_get(M_WAIT, MT_SOOPTS);
444 MCLAIM((*m), so->so_mowner);
445 (*m)->m_len = sizeof (int);
446 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
447 break;
448
449 #ifdef MROUTING
450 case MRT_VERSION:
451 case MRT_ASSERT:
452 error = ip_mrouter_get(so, optname, m);
453 break;
454 #endif
455
456 default:
457 error = ip_ctloutput(op, so, level, optname, m);
458 break;
459 }
460 break;
461 }
462 return (error);
463 }
464
465 int
466 rip_bind(inp, nam)
467 struct inpcb *inp;
468 struct mbuf *nam;
469 {
470 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
471
472 if (nam->m_len != sizeof(*addr))
473 return (EINVAL);
474 if (TAILQ_FIRST(&ifnet) == 0)
475 return (EADDRNOTAVAIL);
476 if (addr->sin_family != AF_INET &&
477 addr->sin_family != AF_IMPLINK)
478 return (EAFNOSUPPORT);
479 if (!in_nullhost(addr->sin_addr) &&
480 ifa_ifwithaddr(sintosa(addr)) == 0)
481 return (EADDRNOTAVAIL);
482 inp->inp_laddr = addr->sin_addr;
483 return (0);
484 }
485
486 int
487 rip_connect(inp, nam)
488 struct inpcb *inp;
489 struct mbuf *nam;
490 {
491 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
492
493 if (nam->m_len != sizeof(*addr))
494 return (EINVAL);
495 if (TAILQ_FIRST(&ifnet) == 0)
496 return (EADDRNOTAVAIL);
497 if (addr->sin_family != AF_INET &&
498 addr->sin_family != AF_IMPLINK)
499 return (EAFNOSUPPORT);
500 inp->inp_faddr = addr->sin_addr;
501 return (0);
502 }
503
504 void
505 rip_disconnect(inp)
506 struct inpcb *inp;
507 {
508
509 inp->inp_faddr = zeroin_addr;
510 }
511
512 u_long rip_sendspace = RIPSNDQ;
513 u_long rip_recvspace = RIPRCVQ;
514
515 /*ARGSUSED*/
516 int
517 rip_usrreq(so, req, m, nam, control, p)
518 struct socket *so;
519 int req;
520 struct mbuf *m, *nam, *control;
521 struct proc *p;
522 {
523 struct inpcb *inp;
524 int s;
525 int error = 0;
526 #ifdef MROUTING
527 extern struct socket *ip_mrouter;
528 #endif
529
530 if (req == PRU_CONTROL)
531 return (in_control(so, (long)m, (caddr_t)nam,
532 (struct ifnet *)control, p));
533
534 if (req == PRU_PURGEIF) {
535 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
536 in_purgeif((struct ifnet *)control);
537 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
538 return (0);
539 }
540
541 s = splsoftnet();
542 inp = sotoinpcb(so);
543 #ifdef DIAGNOSTIC
544 if (req != PRU_SEND && req != PRU_SENDOOB && control)
545 panic("rip_usrreq: unexpected control mbuf");
546 #endif
547 if (inp == 0 && req != PRU_ATTACH) {
548 error = EINVAL;
549 goto release;
550 }
551
552 switch (req) {
553
554 case PRU_ATTACH:
555 if (inp != 0) {
556 error = EISCONN;
557 break;
558 }
559 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
560 error = EACCES;
561 break;
562 }
563 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
564 error = soreserve(so, rip_sendspace, rip_recvspace);
565 if (error)
566 break;
567 }
568 error = in_pcballoc(so, &rawcbtable);
569 if (error)
570 break;
571 inp = sotoinpcb(so);
572 inp->inp_ip.ip_p = (long)nam;
573 break;
574
575 case PRU_DETACH:
576 #ifdef MROUTING
577 if (so == ip_mrouter)
578 ip_mrouter_done();
579 #endif
580 in_pcbdetach(inp);
581 break;
582
583 case PRU_BIND:
584 error = rip_bind(inp, nam);
585 break;
586
587 case PRU_LISTEN:
588 error = EOPNOTSUPP;
589 break;
590
591 case PRU_CONNECT:
592 error = rip_connect(inp, nam);
593 if (error)
594 break;
595 soisconnected(so);
596 break;
597
598 case PRU_CONNECT2:
599 error = EOPNOTSUPP;
600 break;
601
602 case PRU_DISCONNECT:
603 soisdisconnected(so);
604 rip_disconnect(inp);
605 break;
606
607 /*
608 * Mark the connection as being incapable of further input.
609 */
610 case PRU_SHUTDOWN:
611 socantsendmore(so);
612 break;
613
614 case PRU_RCVD:
615 error = EOPNOTSUPP;
616 break;
617
618 /*
619 * Ship a packet out. The appropriate raw output
620 * routine handles any massaging necessary.
621 */
622 case PRU_SEND:
623 if (control && control->m_len) {
624 m_freem(control);
625 m_freem(m);
626 error = EINVAL;
627 break;
628 }
629 {
630 if (nam) {
631 if ((so->so_state & SS_ISCONNECTED) != 0) {
632 error = EISCONN;
633 goto die;
634 }
635 error = rip_connect(inp, nam);
636 if (error) {
637 die:
638 m_freem(m);
639 break;
640 }
641 } else {
642 if ((so->so_state & SS_ISCONNECTED) == 0) {
643 error = ENOTCONN;
644 goto die;
645 }
646 }
647 error = rip_output(m, inp);
648 if (nam)
649 rip_disconnect(inp);
650 }
651 break;
652
653 case PRU_SENSE:
654 /*
655 * stat: don't bother with a blocksize.
656 */
657 splx(s);
658 return (0);
659
660 case PRU_RCVOOB:
661 error = EOPNOTSUPP;
662 break;
663
664 case PRU_SENDOOB:
665 m_freem(control);
666 m_freem(m);
667 error = EOPNOTSUPP;
668 break;
669
670 case PRU_SOCKADDR:
671 in_setsockaddr(inp, nam);
672 break;
673
674 case PRU_PEERADDR:
675 in_setpeeraddr(inp, nam);
676 break;
677
678 default:
679 panic("rip_usrreq");
680 }
681
682 release:
683 splx(s);
684 return (error);
685 }
686