raw_ip.c revision 1.63 1 /* $NetBSD: raw_ip.c,v 1.63 2002/10/22 02:34:47 simonb Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
65 */
66
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.63 2002/10/22 02:34:47 simonb Exp $");
69
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94
95 #include <machine/stdarg.h>
96
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100
101 struct inpcbtable rawcbtable;
102
103 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
104 struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
105 int rip_bind __P((struct inpcb *, struct mbuf *));
106 int rip_connect __P((struct inpcb *, struct mbuf *));
107 void rip_disconnect __P((struct inpcb *));
108
109 /*
110 * Nominal space allocated to a raw ip socket.
111 */
112 #define RIPSNDQ 8192
113 #define RIPRCVQ 8192
114
115 /*
116 * Raw interface to IP protocol.
117 */
118
119 /*
120 * Initialize raw connection block q.
121 */
122 void
123 rip_init()
124 {
125
126 in_pcbinit(&rawcbtable, 1, 1);
127 }
128
129 /*
130 * Setup generic address and protocol structures
131 * for raw_input routine, then pass them along with
132 * mbuf chain.
133 */
134 void
135 #if __STDC__
136 rip_input(struct mbuf *m, ...)
137 #else
138 rip_input(m, va_alist)
139 struct mbuf *m;
140 va_dcl
141 #endif
142 {
143 int proto;
144 struct ip *ip = mtod(m, struct ip *);
145 struct inpcb *inp;
146 struct inpcb *last = 0;
147 struct mbuf *opts = 0;
148 struct sockaddr_in ripsrc;
149 va_list ap;
150
151 va_start(ap, m);
152 proto = va_arg(ap, int);
153 va_end(ap);
154
155 ripsrc.sin_family = AF_INET;
156 ripsrc.sin_len = sizeof(struct sockaddr_in);
157 ripsrc.sin_addr = ip->ip_src;
158 ripsrc.sin_port = 0;
159 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
160
161 /*
162 * XXX Compatibility: programs using raw IP expect ip_len
163 * XXX to have the header length subtracted, and in host order.
164 * XXX ip_off is also expected to be host order.
165 */
166 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
167 NTOHS(ip->ip_off);
168
169 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
170 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
171 continue;
172 if (!in_nullhost(inp->inp_laddr) &&
173 !in_hosteq(inp->inp_laddr, ip->ip_dst))
174 continue;
175 if (!in_nullhost(inp->inp_faddr) &&
176 !in_hosteq(inp->inp_faddr, ip->ip_src))
177 continue;
178 if (last) {
179 struct mbuf *n;
180
181 #ifdef IPSEC
182 /* check AH/ESP integrity. */
183 if (ipsec4_in_reject_so(m, last->inp_socket)) {
184 ipsecstat.in_polvio++;
185 /* do not inject data to pcb */
186 } else
187 #endif /*IPSEC*/
188 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
189 if (last->inp_flags & INP_CONTROLOPTS ||
190 last->inp_socket->so_options & SO_TIMESTAMP)
191 ip_savecontrol(last, &opts, ip, n);
192 if (sbappendaddr(&last->inp_socket->so_rcv,
193 sintosa(&ripsrc), n, opts) == 0) {
194 /* should notify about lost packet */
195 m_freem(n);
196 if (opts)
197 m_freem(opts);
198 } else
199 sorwakeup(last->inp_socket);
200 opts = NULL;
201 }
202 }
203 last = inp;
204 }
205 #ifdef IPSEC
206 /* check AH/ESP integrity. */
207 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
208 m_freem(m);
209 ipsecstat.in_polvio++;
210 ipstat.ips_delivered--;
211 /* do not inject data to pcb */
212 } else
213 #endif /*IPSEC*/
214 if (last) {
215 if (last->inp_flags & INP_CONTROLOPTS ||
216 last->inp_socket->so_options & SO_TIMESTAMP)
217 ip_savecontrol(last, &opts, ip, m);
218 if (sbappendaddr(&last->inp_socket->so_rcv,
219 sintosa(&ripsrc), m, opts) == 0) {
220 m_freem(m);
221 if (opts)
222 m_freem(opts);
223 } else
224 sorwakeup(last->inp_socket);
225 } else {
226 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
227 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
228 0, 0);
229 ipstat.ips_noproto++;
230 ipstat.ips_delivered--;
231 } else
232 m_freem(m);
233 }
234 return;
235 }
236
237 int
238 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
239 struct inpcbtable *table;
240 struct in_addr faddr, laddr;
241 int proto;
242 int errno;
243 void (*notify) __P((struct inpcb *, int));
244 {
245 struct inpcb *inp, *ninp;
246 int nmatch;
247
248 nmatch = 0;
249 for (inp = CIRCLEQ_FIRST(&table->inpt_queue);
250 inp != (struct inpcb *)&table->inpt_queue;
251 inp = ninp) {
252 ninp = inp->inp_queue.cqe_next;
253 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
254 continue;
255 if (in_hosteq(inp->inp_faddr, faddr) &&
256 in_hosteq(inp->inp_laddr, laddr)) {
257 (*notify)(inp, errno);
258 nmatch++;
259 }
260 }
261
262 return nmatch;
263 }
264
265 void *
266 rip_ctlinput(cmd, sa, v)
267 int cmd;
268 struct sockaddr *sa;
269 void *v;
270 {
271 struct ip *ip = v;
272 void (*notify) __P((struct inpcb *, int)) = in_rtchange;
273 int errno;
274
275 if (sa->sa_family != AF_INET ||
276 sa->sa_len != sizeof(struct sockaddr_in))
277 return NULL;
278 if ((unsigned)cmd >= PRC_NCMDS)
279 return NULL;
280 errno = inetctlerrmap[cmd];
281 if (PRC_IS_REDIRECT(cmd))
282 notify = in_rtchange, ip = 0;
283 else if (cmd == PRC_HOSTDEAD)
284 ip = 0;
285 else if (errno == 0)
286 return NULL;
287 if (ip) {
288 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
289 ip->ip_src, ip->ip_p, errno, notify);
290
291 /* XXX mapped address case */
292 } else
293 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
294 notify);
295 return NULL;
296 }
297
298 /*
299 * Generate IP header and pass packet to ip_output.
300 * Tack on options user may have setup with control call.
301 */
302 int
303 #if __STDC__
304 rip_output(struct mbuf *m, ...)
305 #else
306 rip_output(m, va_alist)
307 struct mbuf *m;
308 va_dcl
309 #endif
310 {
311 struct inpcb *inp;
312 struct ip *ip;
313 struct mbuf *opts;
314 int flags;
315 va_list ap;
316
317 va_start(ap, m);
318 inp = va_arg(ap, struct inpcb *);
319 va_end(ap);
320
321 flags =
322 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
323 | IP_RETURNMTU;
324
325 /*
326 * If the user handed us a complete IP packet, use it.
327 * Otherwise, allocate an mbuf for a header and fill it in.
328 */
329 if ((inp->inp_flags & INP_HDRINCL) == 0) {
330 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
331 m_freem(m);
332 return (EMSGSIZE);
333 }
334 M_PREPEND(m, sizeof(struct ip), M_WAIT);
335 ip = mtod(m, struct ip *);
336 ip->ip_tos = 0;
337 ip->ip_off = htons(0);
338 ip->ip_p = inp->inp_ip.ip_p;
339 ip->ip_len = htons(m->m_pkthdr.len);
340 ip->ip_src = inp->inp_laddr;
341 ip->ip_dst = inp->inp_faddr;
342 ip->ip_ttl = MAXTTL;
343 opts = inp->inp_options;
344 } else {
345 if (m->m_pkthdr.len > IP_MAXPACKET) {
346 m_freem(m);
347 return (EMSGSIZE);
348 }
349 ip = mtod(m, struct ip *);
350 /* XXX userland passes ip_len and ip_off in host order */
351 if (m->m_pkthdr.len != ip->ip_len) {
352 m_freem(m);
353 return (EINVAL);
354 }
355 HTONS(ip->ip_len);
356 HTONS(ip->ip_off);
357 if (ip->ip_id == 0)
358 ip->ip_id = htons(ip_id++);
359 opts = NULL;
360 /* XXX prevent ip_output from overwriting header fields */
361 flags |= IP_RAWOUTPUT;
362 ipstat.ips_rawout++;
363 }
364 #ifdef IPSEC
365 if (ipsec_setsocket(m, inp->inp_socket) != 0) {
366 m_freem(m);
367 return ENOBUFS;
368 }
369 #endif /*IPSEC*/
370 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
371 &inp->inp_errormtu));
372 }
373
374 /*
375 * Raw IP socket option processing.
376 */
377 int
378 rip_ctloutput(op, so, level, optname, m)
379 int op;
380 struct socket *so;
381 int level, optname;
382 struct mbuf **m;
383 {
384 struct inpcb *inp = sotoinpcb(so);
385 int error = 0;
386
387 if (level != IPPROTO_IP) {
388 error = ENOPROTOOPT;
389 if (op == PRCO_SETOPT && *m != 0)
390 (void) m_free(*m);
391 } else switch (op) {
392
393 case PRCO_SETOPT:
394 switch (optname) {
395 case IP_HDRINCL:
396 if (*m == 0 || (*m)->m_len < sizeof (int))
397 error = EINVAL;
398 else {
399 if (*mtod(*m, int *))
400 inp->inp_flags |= INP_HDRINCL;
401 else
402 inp->inp_flags &= ~INP_HDRINCL;
403 }
404 if (*m != 0)
405 (void) m_free(*m);
406 break;
407
408 #ifdef MROUTING
409 case MRT_INIT:
410 case MRT_DONE:
411 case MRT_ADD_VIF:
412 case MRT_DEL_VIF:
413 case MRT_ADD_MFC:
414 case MRT_DEL_MFC:
415 case MRT_ASSERT:
416 error = ip_mrouter_set(so, optname, m);
417 break;
418 #endif
419
420 default:
421 error = ip_ctloutput(op, so, level, optname, m);
422 break;
423 }
424 break;
425
426 case PRCO_GETOPT:
427 switch (optname) {
428 case IP_HDRINCL:
429 *m = m_get(M_WAIT, M_SOOPTS);
430 (*m)->m_len = sizeof (int);
431 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
432 break;
433
434 #ifdef MROUTING
435 case MRT_VERSION:
436 case MRT_ASSERT:
437 error = ip_mrouter_get(so, optname, m);
438 break;
439 #endif
440
441 default:
442 error = ip_ctloutput(op, so, level, optname, m);
443 break;
444 }
445 break;
446 }
447 return (error);
448 }
449
450 int
451 rip_bind(inp, nam)
452 struct inpcb *inp;
453 struct mbuf *nam;
454 {
455 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
456
457 if (nam->m_len != sizeof(*addr))
458 return (EINVAL);
459 if (TAILQ_FIRST(&ifnet) == 0)
460 return (EADDRNOTAVAIL);
461 if (addr->sin_family != AF_INET &&
462 addr->sin_family != AF_IMPLINK)
463 return (EAFNOSUPPORT);
464 if (!in_nullhost(addr->sin_addr) &&
465 ifa_ifwithaddr(sintosa(addr)) == 0)
466 return (EADDRNOTAVAIL);
467 inp->inp_laddr = addr->sin_addr;
468 return (0);
469 }
470
471 int
472 rip_connect(inp, nam)
473 struct inpcb *inp;
474 struct mbuf *nam;
475 {
476 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
477
478 if (nam->m_len != sizeof(*addr))
479 return (EINVAL);
480 if (TAILQ_FIRST(&ifnet) == 0)
481 return (EADDRNOTAVAIL);
482 if (addr->sin_family != AF_INET &&
483 addr->sin_family != AF_IMPLINK)
484 return (EAFNOSUPPORT);
485 inp->inp_faddr = addr->sin_addr;
486 return (0);
487 }
488
489 void
490 rip_disconnect(inp)
491 struct inpcb *inp;
492 {
493
494 inp->inp_faddr = zeroin_addr;
495 }
496
497 u_long rip_sendspace = RIPSNDQ;
498 u_long rip_recvspace = RIPRCVQ;
499
500 /*ARGSUSED*/
501 int
502 rip_usrreq(so, req, m, nam, control, p)
503 struct socket *so;
504 int req;
505 struct mbuf *m, *nam, *control;
506 struct proc *p;
507 {
508 struct inpcb *inp;
509 int s;
510 int error = 0;
511 #ifdef MROUTING
512 extern struct socket *ip_mrouter;
513 #endif
514
515 if (req == PRU_CONTROL)
516 return (in_control(so, (long)m, (caddr_t)nam,
517 (struct ifnet *)control, p));
518
519 if (req == PRU_PURGEIF) {
520 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
521 in_purgeif((struct ifnet *)control);
522 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
523 return (0);
524 }
525
526 s = splsoftnet();
527 inp = sotoinpcb(so);
528 #ifdef DIAGNOSTIC
529 if (req != PRU_SEND && req != PRU_SENDOOB && control)
530 panic("rip_usrreq: unexpected control mbuf");
531 #endif
532 if (inp == 0 && req != PRU_ATTACH) {
533 error = EINVAL;
534 goto release;
535 }
536
537 switch (req) {
538
539 case PRU_ATTACH:
540 if (inp != 0) {
541 error = EISCONN;
542 break;
543 }
544 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
545 error = EACCES;
546 break;
547 }
548 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
549 error = soreserve(so, rip_sendspace, rip_recvspace);
550 if (error)
551 break;
552 }
553 error = in_pcballoc(so, &rawcbtable);
554 if (error)
555 break;
556 inp = sotoinpcb(so);
557 inp->inp_ip.ip_p = (long)nam;
558 break;
559
560 case PRU_DETACH:
561 #ifdef MROUTING
562 if (so == ip_mrouter)
563 ip_mrouter_done();
564 #endif
565 in_pcbdetach(inp);
566 break;
567
568 case PRU_BIND:
569 error = rip_bind(inp, nam);
570 break;
571
572 case PRU_LISTEN:
573 error = EOPNOTSUPP;
574 break;
575
576 case PRU_CONNECT:
577 error = rip_connect(inp, nam);
578 if (error)
579 break;
580 soisconnected(so);
581 break;
582
583 case PRU_CONNECT2:
584 error = EOPNOTSUPP;
585 break;
586
587 case PRU_DISCONNECT:
588 soisdisconnected(so);
589 rip_disconnect(inp);
590 break;
591
592 /*
593 * Mark the connection as being incapable of further input.
594 */
595 case PRU_SHUTDOWN:
596 socantsendmore(so);
597 break;
598
599 case PRU_RCVD:
600 error = EOPNOTSUPP;
601 break;
602
603 /*
604 * Ship a packet out. The appropriate raw output
605 * routine handles any massaging necessary.
606 */
607 case PRU_SEND:
608 if (control && control->m_len) {
609 m_freem(control);
610 m_freem(m);
611 error = EINVAL;
612 break;
613 }
614 {
615 if (nam) {
616 if ((so->so_state & SS_ISCONNECTED) != 0) {
617 error = EISCONN;
618 goto die;
619 }
620 error = rip_connect(inp, nam);
621 if (error) {
622 die:
623 m_freem(m);
624 break;
625 }
626 } else {
627 if ((so->so_state & SS_ISCONNECTED) == 0) {
628 error = ENOTCONN;
629 goto die;
630 }
631 }
632 error = rip_output(m, inp);
633 if (nam)
634 rip_disconnect(inp);
635 }
636 break;
637
638 case PRU_SENSE:
639 /*
640 * stat: don't bother with a blocksize.
641 */
642 splx(s);
643 return (0);
644
645 case PRU_RCVOOB:
646 error = EOPNOTSUPP;
647 break;
648
649 case PRU_SENDOOB:
650 m_freem(control);
651 m_freem(m);
652 error = EOPNOTSUPP;
653 break;
654
655 case PRU_SOCKADDR:
656 in_setsockaddr(inp, nam);
657 break;
658
659 case PRU_PEERADDR:
660 in_setpeeraddr(inp, nam);
661 break;
662
663 default:
664 panic("rip_usrreq");
665 }
666
667 release:
668 splx(s);
669 return (error);
670 }
671