raw_ip.c revision 1.62 1 /* $NetBSD: raw_ip.c,v 1.62 2002/08/14 00:23:33 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
65 */
66
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.62 2002/08/14 00:23:33 itojun Exp $");
69
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94
95 #include <machine/stdarg.h>
96
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100
101 struct inpcbtable rawcbtable;
102
103 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
104 struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
105 int rip_bind __P((struct inpcb *, struct mbuf *));
106 int rip_connect __P((struct inpcb *, struct mbuf *));
107 void rip_disconnect __P((struct inpcb *));
108
109 /*
110 * Nominal space allocated to a raw ip socket.
111 */
112 #define RIPSNDQ 8192
113 #define RIPRCVQ 8192
114
115 /*
116 * Raw interface to IP protocol.
117 */
118
119 /*
120 * Initialize raw connection block q.
121 */
122 void
123 rip_init()
124 {
125
126 in_pcbinit(&rawcbtable, 1, 1);
127 }
128
129 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
130
131 /*
132 * Setup generic address and protocol structures
133 * for raw_input routine, then pass them along with
134 * mbuf chain.
135 */
136 void
137 #if __STDC__
138 rip_input(struct mbuf *m, ...)
139 #else
140 rip_input(m, va_alist)
141 struct mbuf *m;
142 va_dcl
143 #endif
144 {
145 int off, proto;
146 struct ip *ip = mtod(m, struct ip *);
147 struct inpcb *inp;
148 struct inpcb *last = 0;
149 struct mbuf *opts = 0;
150 struct sockaddr_in ripsrc;
151 va_list ap;
152
153 va_start(ap, m);
154 off = va_arg(ap, int);
155 proto = va_arg(ap, int);
156 va_end(ap);
157
158 ripsrc.sin_family = AF_INET;
159 ripsrc.sin_len = sizeof(struct sockaddr_in);
160 ripsrc.sin_addr = ip->ip_src;
161 ripsrc.sin_port = 0;
162 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
163
164 /*
165 * XXX Compatibility: programs using raw IP expect ip_len
166 * XXX to have the header length subtracted, and in host order.
167 * XXX ip_off is also expected to be host order.
168 */
169 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
170 NTOHS(ip->ip_off);
171
172 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
173 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
174 continue;
175 if (!in_nullhost(inp->inp_laddr) &&
176 !in_hosteq(inp->inp_laddr, ip->ip_dst))
177 continue;
178 if (!in_nullhost(inp->inp_faddr) &&
179 !in_hosteq(inp->inp_faddr, ip->ip_src))
180 continue;
181 if (last) {
182 struct mbuf *n;
183
184 #ifdef IPSEC
185 /* check AH/ESP integrity. */
186 if (ipsec4_in_reject_so(m, last->inp_socket)) {
187 ipsecstat.in_polvio++;
188 /* do not inject data to pcb */
189 } else
190 #endif /*IPSEC*/
191 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
192 if (last->inp_flags & INP_CONTROLOPTS ||
193 last->inp_socket->so_options & SO_TIMESTAMP)
194 ip_savecontrol(last, &opts, ip, n);
195 if (sbappendaddr(&last->inp_socket->so_rcv,
196 sintosa(&ripsrc), n, opts) == 0) {
197 /* should notify about lost packet */
198 m_freem(n);
199 if (opts)
200 m_freem(opts);
201 } else
202 sorwakeup(last->inp_socket);
203 opts = NULL;
204 }
205 }
206 last = inp;
207 }
208 #ifdef IPSEC
209 /* check AH/ESP integrity. */
210 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
211 m_freem(m);
212 ipsecstat.in_polvio++;
213 ipstat.ips_delivered--;
214 /* do not inject data to pcb */
215 } else
216 #endif /*IPSEC*/
217 if (last) {
218 if (last->inp_flags & INP_CONTROLOPTS ||
219 last->inp_socket->so_options & SO_TIMESTAMP)
220 ip_savecontrol(last, &opts, ip, m);
221 if (sbappendaddr(&last->inp_socket->so_rcv,
222 sintosa(&ripsrc), m, opts) == 0) {
223 m_freem(m);
224 if (opts)
225 m_freem(opts);
226 } else
227 sorwakeup(last->inp_socket);
228 } else {
229 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
230 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
231 0, 0);
232 ipstat.ips_noproto++;
233 ipstat.ips_delivered--;
234 } else
235 m_freem(m);
236 }
237 return;
238 }
239
240 int
241 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
242 struct inpcbtable *table;
243 struct in_addr faddr, laddr;
244 int proto;
245 int errno;
246 void (*notify) __P((struct inpcb *, int));
247 {
248 struct inpcb *inp, *ninp;
249 int nmatch;
250
251 nmatch = 0;
252 for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
253 inp != (struct inpcb *)&table->inpt_queue;
254 inp = ninp) {
255 ninp = inp->inp_queue.cqe_next;
256 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
257 continue;
258 if (in_hosteq(inp->inp_faddr, faddr) &&
259 in_hosteq(inp->inp_laddr, laddr)) {
260 (*notify)(inp, errno);
261 nmatch++;
262 }
263 }
264
265 return nmatch;
266 }
267
268 void *
269 rip_ctlinput(cmd, sa, v)
270 int cmd;
271 struct sockaddr *sa;
272 void *v;
273 {
274 struct ip *ip = v;
275 void (*notify) __P((struct inpcb *, int)) = in_rtchange;
276 int errno;
277
278 if (sa->sa_family != AF_INET ||
279 sa->sa_len != sizeof(struct sockaddr_in))
280 return NULL;
281 if ((unsigned)cmd >= PRC_NCMDS)
282 return NULL;
283 errno = inetctlerrmap[cmd];
284 if (PRC_IS_REDIRECT(cmd))
285 notify = in_rtchange, ip = 0;
286 else if (cmd == PRC_HOSTDEAD)
287 ip = 0;
288 else if (errno == 0)
289 return NULL;
290 if (ip) {
291 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
292 ip->ip_src, ip->ip_p, errno, notify);
293
294 /* XXX mapped address case */
295 } else
296 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
297 notify);
298 return NULL;
299 }
300
301 /*
302 * Generate IP header and pass packet to ip_output.
303 * Tack on options user may have setup with control call.
304 */
305 int
306 #if __STDC__
307 rip_output(struct mbuf *m, ...)
308 #else
309 rip_output(m, va_alist)
310 struct mbuf *m;
311 va_dcl
312 #endif
313 {
314 struct inpcb *inp;
315 struct ip *ip;
316 struct mbuf *opts;
317 int flags;
318 va_list ap;
319
320 va_start(ap, m);
321 inp = va_arg(ap, struct inpcb *);
322 va_end(ap);
323
324 flags =
325 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
326 | IP_RETURNMTU;
327
328 /*
329 * If the user handed us a complete IP packet, use it.
330 * Otherwise, allocate an mbuf for a header and fill it in.
331 */
332 if ((inp->inp_flags & INP_HDRINCL) == 0) {
333 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
334 m_freem(m);
335 return (EMSGSIZE);
336 }
337 M_PREPEND(m, sizeof(struct ip), M_WAIT);
338 ip = mtod(m, struct ip *);
339 ip->ip_tos = 0;
340 ip->ip_off = htons(0);
341 ip->ip_p = inp->inp_ip.ip_p;
342 ip->ip_len = htons(m->m_pkthdr.len);
343 ip->ip_src = inp->inp_laddr;
344 ip->ip_dst = inp->inp_faddr;
345 ip->ip_ttl = MAXTTL;
346 opts = inp->inp_options;
347 } else {
348 if (m->m_pkthdr.len > IP_MAXPACKET) {
349 m_freem(m);
350 return (EMSGSIZE);
351 }
352 ip = mtod(m, struct ip *);
353 /* XXX userland passes ip_len and ip_off in host order */
354 if (m->m_pkthdr.len != ip->ip_len) {
355 m_freem(m);
356 return (EINVAL);
357 }
358 HTONS(ip->ip_len);
359 HTONS(ip->ip_off);
360 if (ip->ip_id == 0)
361 ip->ip_id = htons(ip_id++);
362 opts = NULL;
363 /* XXX prevent ip_output from overwriting header fields */
364 flags |= IP_RAWOUTPUT;
365 ipstat.ips_rawout++;
366 }
367 #ifdef IPSEC
368 if (ipsec_setsocket(m, inp->inp_socket) != 0) {
369 m_freem(m);
370 return ENOBUFS;
371 }
372 #endif /*IPSEC*/
373 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
374 &inp->inp_errormtu));
375 }
376
377 /*
378 * Raw IP socket option processing.
379 */
380 int
381 rip_ctloutput(op, so, level, optname, m)
382 int op;
383 struct socket *so;
384 int level, optname;
385 struct mbuf **m;
386 {
387 struct inpcb *inp = sotoinpcb(so);
388 int error = 0;
389
390 if (level != IPPROTO_IP) {
391 error = ENOPROTOOPT;
392 if (op == PRCO_SETOPT && *m != 0)
393 (void) m_free(*m);
394 } else switch (op) {
395
396 case PRCO_SETOPT:
397 switch (optname) {
398 case IP_HDRINCL:
399 if (*m == 0 || (*m)->m_len < sizeof (int))
400 error = EINVAL;
401 else {
402 if (*mtod(*m, int *))
403 inp->inp_flags |= INP_HDRINCL;
404 else
405 inp->inp_flags &= ~INP_HDRINCL;
406 }
407 if (*m != 0)
408 (void) m_free(*m);
409 break;
410
411 #ifdef MROUTING
412 case MRT_INIT:
413 case MRT_DONE:
414 case MRT_ADD_VIF:
415 case MRT_DEL_VIF:
416 case MRT_ADD_MFC:
417 case MRT_DEL_MFC:
418 case MRT_ASSERT:
419 error = ip_mrouter_set(so, optname, m);
420 break;
421 #endif
422
423 default:
424 error = ip_ctloutput(op, so, level, optname, m);
425 break;
426 }
427 break;
428
429 case PRCO_GETOPT:
430 switch (optname) {
431 case IP_HDRINCL:
432 *m = m_get(M_WAIT, M_SOOPTS);
433 (*m)->m_len = sizeof (int);
434 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
435 break;
436
437 #ifdef MROUTING
438 case MRT_VERSION:
439 case MRT_ASSERT:
440 error = ip_mrouter_get(so, optname, m);
441 break;
442 #endif
443
444 default:
445 error = ip_ctloutput(op, so, level, optname, m);
446 break;
447 }
448 break;
449 }
450 return (error);
451 }
452
453 int
454 rip_bind(inp, nam)
455 struct inpcb *inp;
456 struct mbuf *nam;
457 {
458 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
459
460 if (nam->m_len != sizeof(*addr))
461 return (EINVAL);
462 if (TAILQ_FIRST(&ifnet) == 0)
463 return (EADDRNOTAVAIL);
464 if (addr->sin_family != AF_INET &&
465 addr->sin_family != AF_IMPLINK)
466 return (EAFNOSUPPORT);
467 if (!in_nullhost(addr->sin_addr) &&
468 ifa_ifwithaddr(sintosa(addr)) == 0)
469 return (EADDRNOTAVAIL);
470 inp->inp_laddr = addr->sin_addr;
471 return (0);
472 }
473
474 int
475 rip_connect(inp, nam)
476 struct inpcb *inp;
477 struct mbuf *nam;
478 {
479 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
480
481 if (nam->m_len != sizeof(*addr))
482 return (EINVAL);
483 if (TAILQ_FIRST(&ifnet) == 0)
484 return (EADDRNOTAVAIL);
485 if (addr->sin_family != AF_INET &&
486 addr->sin_family != AF_IMPLINK)
487 return (EAFNOSUPPORT);
488 inp->inp_faddr = addr->sin_addr;
489 return (0);
490 }
491
492 void
493 rip_disconnect(inp)
494 struct inpcb *inp;
495 {
496
497 inp->inp_faddr = zeroin_addr;
498 }
499
500 u_long rip_sendspace = RIPSNDQ;
501 u_long rip_recvspace = RIPRCVQ;
502
503 /*ARGSUSED*/
504 int
505 rip_usrreq(so, req, m, nam, control, p)
506 struct socket *so;
507 int req;
508 struct mbuf *m, *nam, *control;
509 struct proc *p;
510 {
511 struct inpcb *inp;
512 int s;
513 int error = 0;
514 #ifdef MROUTING
515 extern struct socket *ip_mrouter;
516 #endif
517
518 if (req == PRU_CONTROL)
519 return (in_control(so, (long)m, (caddr_t)nam,
520 (struct ifnet *)control, p));
521
522 if (req == PRU_PURGEIF) {
523 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
524 in_purgeif((struct ifnet *)control);
525 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
526 return (0);
527 }
528
529 s = splsoftnet();
530 inp = sotoinpcb(so);
531 #ifdef DIAGNOSTIC
532 if (req != PRU_SEND && req != PRU_SENDOOB && control)
533 panic("rip_usrreq: unexpected control mbuf");
534 #endif
535 if (inp == 0 && req != PRU_ATTACH) {
536 error = EINVAL;
537 goto release;
538 }
539
540 switch (req) {
541
542 case PRU_ATTACH:
543 if (inp != 0) {
544 error = EISCONN;
545 break;
546 }
547 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
548 error = EACCES;
549 break;
550 }
551 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
552 error = soreserve(so, rip_sendspace, rip_recvspace);
553 if (error)
554 break;
555 }
556 error = in_pcballoc(so, &rawcbtable);
557 if (error)
558 break;
559 inp = sotoinpcb(so);
560 inp->inp_ip.ip_p = (long)nam;
561 break;
562
563 case PRU_DETACH:
564 #ifdef MROUTING
565 if (so == ip_mrouter)
566 ip_mrouter_done();
567 #endif
568 in_pcbdetach(inp);
569 break;
570
571 case PRU_BIND:
572 error = rip_bind(inp, nam);
573 break;
574
575 case PRU_LISTEN:
576 error = EOPNOTSUPP;
577 break;
578
579 case PRU_CONNECT:
580 error = rip_connect(inp, nam);
581 if (error)
582 break;
583 soisconnected(so);
584 break;
585
586 case PRU_CONNECT2:
587 error = EOPNOTSUPP;
588 break;
589
590 case PRU_DISCONNECT:
591 soisdisconnected(so);
592 rip_disconnect(inp);
593 break;
594
595 /*
596 * Mark the connection as being incapable of further input.
597 */
598 case PRU_SHUTDOWN:
599 socantsendmore(so);
600 break;
601
602 case PRU_RCVD:
603 error = EOPNOTSUPP;
604 break;
605
606 /*
607 * Ship a packet out. The appropriate raw output
608 * routine handles any massaging necessary.
609 */
610 case PRU_SEND:
611 if (control && control->m_len) {
612 m_freem(control);
613 m_freem(m);
614 error = EINVAL;
615 break;
616 }
617 {
618 if (nam) {
619 if ((so->so_state & SS_ISCONNECTED) != 0) {
620 error = EISCONN;
621 goto die;
622 }
623 error = rip_connect(inp, nam);
624 if (error) {
625 die:
626 m_freem(m);
627 break;
628 }
629 } else {
630 if ((so->so_state & SS_ISCONNECTED) == 0) {
631 error = ENOTCONN;
632 goto die;
633 }
634 }
635 error = rip_output(m, inp);
636 if (nam)
637 rip_disconnect(inp);
638 }
639 break;
640
641 case PRU_SENSE:
642 /*
643 * stat: don't bother with a blocksize.
644 */
645 splx(s);
646 return (0);
647
648 case PRU_RCVOOB:
649 error = EOPNOTSUPP;
650 break;
651
652 case PRU_SENDOOB:
653 m_freem(control);
654 m_freem(m);
655 error = EOPNOTSUPP;
656 break;
657
658 case PRU_SOCKADDR:
659 in_setsockaddr(inp, nam);
660 break;
661
662 case PRU_PEERADDR:
663 in_setpeeraddr(inp, nam);
664 break;
665
666 default:
667 panic("rip_usrreq");
668 }
669
670 release:
671 splx(s);
672 return (error);
673 }
674