raw_ip.c revision 1.70.2.2 1 /* $NetBSD: raw_ip.c,v 1.70.2.2 2004/08/03 10:54:43 skrll Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.70.2.2 2004/08/03 10:54:43 skrll Exp $");
65
66 #include "opt_inet.h"
67 #include "opt_ipsec.h"
68 #include "opt_mrouting.h"
69
70 #include <sys/param.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/socket.h>
74 #include <sys/protosw.h>
75 #include <sys/socketvar.h>
76 #include <sys/errno.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/ip_mroute.h>
88 #include <netinet/ip_icmp.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_var.h>
91
92 #include <machine/stdarg.h>
93
94 #ifdef IPSEC
95 #include <netinet6/ipsec.h>
96 #endif /*IPSEC*/
97
98 #ifdef FAST_IPSEC
99 #include <netipsec/ipsec.h>
100 #include <netipsec/ipsec_var.h> /* XXX ipsecstat namespace */
101 #endif /* FAST_IPSEC*/
102
103 struct inpcbtable rawcbtable;
104
105 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
106 struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
107 int rip_bind __P((struct inpcb *, struct mbuf *));
108 int rip_connect __P((struct inpcb *, struct mbuf *));
109 void rip_disconnect __P((struct inpcb *));
110
111 /*
112 * Nominal space allocated to a raw ip socket.
113 */
114 #define RIPSNDQ 8192
115 #define RIPRCVQ 8192
116
117 /*
118 * Raw interface to IP protocol.
119 */
120
121 /*
122 * Initialize raw connection block q.
123 */
124 void
125 rip_init()
126 {
127
128 in_pcbinit(&rawcbtable, 1, 1);
129 }
130
131 /*
132 * Setup generic address and protocol structures
133 * for raw_input routine, then pass them along with
134 * mbuf chain.
135 */
136 void
137 rip_input(struct mbuf *m, ...)
138 {
139 int proto;
140 struct ip *ip = mtod(m, struct ip *);
141 struct inpcb_hdr *inph;
142 struct inpcb *inp;
143 struct inpcb *last = 0;
144 struct mbuf *opts = 0;
145 struct sockaddr_in ripsrc;
146 va_list ap;
147
148 va_start(ap, m);
149 (void)va_arg(ap, int); /* ignore value, advance ap */
150 proto = va_arg(ap, int);
151 va_end(ap);
152
153 ripsrc.sin_family = AF_INET;
154 ripsrc.sin_len = sizeof(struct sockaddr_in);
155 ripsrc.sin_addr = ip->ip_src;
156 ripsrc.sin_port = 0;
157 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
158
159 /*
160 * XXX Compatibility: programs using raw IP expect ip_len
161 * XXX to have the header length subtracted, and in host order.
162 * XXX ip_off is also expected to be host order.
163 */
164 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
165 NTOHS(ip->ip_off);
166
167 CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) {
168 inp = (struct inpcb *)inph;
169 if (inp->inp_af != AF_INET)
170 continue;
171 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
172 continue;
173 if (!in_nullhost(inp->inp_laddr) &&
174 !in_hosteq(inp->inp_laddr, ip->ip_dst))
175 continue;
176 if (!in_nullhost(inp->inp_faddr) &&
177 !in_hosteq(inp->inp_faddr, ip->ip_src))
178 continue;
179 if (last) {
180 struct mbuf *n;
181
182 #if defined(IPSEC) || defined(FAST_IPSEC)
183 /* check AH/ESP integrity. */
184 if (ipsec4_in_reject_so(m, last->inp_socket)) {
185 ipsecstat.in_polvio++;
186 /* do not inject data to pcb */
187 } else
188 #endif /*IPSEC*/
189 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
190 if (last->inp_flags & INP_CONTROLOPTS ||
191 last->inp_socket->so_options & SO_TIMESTAMP)
192 ip_savecontrol(last, &opts, ip, n);
193 if (sbappendaddr(&last->inp_socket->so_rcv,
194 sintosa(&ripsrc), n, opts) == 0) {
195 /* should notify about lost packet */
196 m_freem(n);
197 if (opts)
198 m_freem(opts);
199 } else
200 sorwakeup(last->inp_socket);
201 opts = NULL;
202 }
203 }
204 last = inp;
205 }
206 #if defined(IPSEC) || defined(FAST_IPSEC)
207 /* check AH/ESP integrity. */
208 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
209 m_freem(m);
210 ipsecstat.in_polvio++;
211 ipstat.ips_delivered--;
212 /* do not inject data to pcb */
213 } else
214 #endif /*IPSEC*/
215 if (last) {
216 if (last->inp_flags & INP_CONTROLOPTS ||
217 last->inp_socket->so_options & SO_TIMESTAMP)
218 ip_savecontrol(last, &opts, ip, m);
219 if (sbappendaddr(&last->inp_socket->so_rcv,
220 sintosa(&ripsrc), m, opts) == 0) {
221 m_freem(m);
222 if (opts)
223 m_freem(opts);
224 } else
225 sorwakeup(last->inp_socket);
226 } else {
227 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
228 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
229 0, 0);
230 ipstat.ips_noproto++;
231 ipstat.ips_delivered--;
232 } else
233 m_freem(m);
234 }
235 return;
236 }
237
238 int
239 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
240 struct inpcbtable *table;
241 struct in_addr faddr, laddr;
242 int proto;
243 int errno;
244 void (*notify) __P((struct inpcb *, int));
245 {
246 struct inpcb *inp, *ninp;
247 int nmatch;
248
249 nmatch = 0;
250 for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue);
251 inp != (struct inpcb *)&table->inpt_queue;
252 inp = ninp) {
253 ninp = (struct inpcb *)inp->inp_queue.cqe_next;
254 if (inp->inp_af != AF_INET)
255 continue;
256 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
257 continue;
258 if (in_hosteq(inp->inp_faddr, faddr) &&
259 in_hosteq(inp->inp_laddr, laddr)) {
260 (*notify)(inp, errno);
261 nmatch++;
262 }
263 }
264
265 return nmatch;
266 }
267
268 void *
269 rip_ctlinput(cmd, sa, v)
270 int cmd;
271 struct sockaddr *sa;
272 void *v;
273 {
274 struct ip *ip = v;
275 void (*notify) __P((struct inpcb *, int)) = in_rtchange;
276 int errno;
277
278 if (sa->sa_family != AF_INET ||
279 sa->sa_len != sizeof(struct sockaddr_in))
280 return NULL;
281 if ((unsigned)cmd >= PRC_NCMDS)
282 return NULL;
283 errno = inetctlerrmap[cmd];
284 if (PRC_IS_REDIRECT(cmd))
285 notify = in_rtchange, ip = 0;
286 else if (cmd == PRC_HOSTDEAD)
287 ip = 0;
288 else if (errno == 0)
289 return NULL;
290 if (ip) {
291 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
292 ip->ip_src, ip->ip_p, errno, notify);
293
294 /* XXX mapped address case */
295 } else
296 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
297 notify);
298 return NULL;
299 }
300
301 /*
302 * Generate IP header and pass packet to ip_output.
303 * Tack on options user may have setup with control call.
304 */
305 int
306 rip_output(struct mbuf *m, ...)
307 {
308 struct inpcb *inp;
309 struct ip *ip;
310 struct mbuf *opts;
311 int flags;
312 va_list ap;
313
314 va_start(ap, m);
315 inp = va_arg(ap, struct inpcb *);
316 va_end(ap);
317
318 flags =
319 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
320 | IP_RETURNMTU;
321
322 /*
323 * If the user handed us a complete IP packet, use it.
324 * Otherwise, allocate an mbuf for a header and fill it in.
325 */
326 if ((inp->inp_flags & INP_HDRINCL) == 0) {
327 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
328 m_freem(m);
329 return (EMSGSIZE);
330 }
331 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
332 if (!m)
333 return (ENOBUFS);
334 ip = mtod(m, struct ip *);
335 ip->ip_tos = 0;
336 ip->ip_off = htons(0);
337 ip->ip_p = inp->inp_ip.ip_p;
338 ip->ip_len = htons(m->m_pkthdr.len);
339 ip->ip_src = inp->inp_laddr;
340 ip->ip_dst = inp->inp_faddr;
341 ip->ip_ttl = MAXTTL;
342 opts = inp->inp_options;
343 } else {
344 if (m->m_pkthdr.len > IP_MAXPACKET) {
345 m_freem(m);
346 return (EMSGSIZE);
347 }
348 ip = mtod(m, struct ip *);
349
350 /*
351 * If the mbuf is read-only, we need to allocate
352 * a new mbuf for the header, since we need to
353 * modify the header.
354 */
355 if (M_READONLY(m)) {
356 int hlen = ip->ip_hl << 2;
357
358 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3);
359 if (m == NULL)
360 return (ENOMEM); /* XXX */
361 ip = mtod(m, struct ip *);
362 }
363
364 /* XXX userland passes ip_len and ip_off in host order */
365 if (m->m_pkthdr.len != ip->ip_len) {
366 m_freem(m);
367 return (EINVAL);
368 }
369 HTONS(ip->ip_len);
370 HTONS(ip->ip_off);
371 if (ip->ip_id == 0)
372 ip->ip_id = ip_newid();
373 opts = NULL;
374 /* XXX prevent ip_output from overwriting header fields */
375 flags |= IP_RAWOUTPUT;
376 ipstat.ips_rawout++;
377 }
378 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
379 inp->inp_socket, &inp->inp_errormtu));
380 }
381
382 /*
383 * Raw IP socket option processing.
384 */
385 int
386 rip_ctloutput(op, so, level, optname, m)
387 int op;
388 struct socket *so;
389 int level, optname;
390 struct mbuf **m;
391 {
392 struct inpcb *inp = sotoinpcb(so);
393 int error = 0;
394
395 if (level != IPPROTO_IP) {
396 error = ENOPROTOOPT;
397 if (op == PRCO_SETOPT && *m != 0)
398 (void) m_free(*m);
399 } else switch (op) {
400
401 case PRCO_SETOPT:
402 switch (optname) {
403 case IP_HDRINCL:
404 if (*m == 0 || (*m)->m_len < sizeof (int))
405 error = EINVAL;
406 else {
407 if (*mtod(*m, int *))
408 inp->inp_flags |= INP_HDRINCL;
409 else
410 inp->inp_flags &= ~INP_HDRINCL;
411 }
412 if (*m != 0)
413 (void) m_free(*m);
414 break;
415
416 #ifdef MROUTING
417 case MRT_INIT:
418 case MRT_DONE:
419 case MRT_ADD_VIF:
420 case MRT_DEL_VIF:
421 case MRT_ADD_MFC:
422 case MRT_DEL_MFC:
423 case MRT_ASSERT:
424 error = ip_mrouter_set(so, optname, m);
425 break;
426 #endif
427
428 default:
429 error = ip_ctloutput(op, so, level, optname, m);
430 break;
431 }
432 break;
433
434 case PRCO_GETOPT:
435 switch (optname) {
436 case IP_HDRINCL:
437 *m = m_get(M_WAIT, MT_SOOPTS);
438 MCLAIM((*m), so->so_mowner);
439 (*m)->m_len = sizeof (int);
440 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
441 break;
442
443 #ifdef MROUTING
444 case MRT_VERSION:
445 case MRT_ASSERT:
446 error = ip_mrouter_get(so, optname, m);
447 break;
448 #endif
449
450 default:
451 error = ip_ctloutput(op, so, level, optname, m);
452 break;
453 }
454 break;
455 }
456 return (error);
457 }
458
459 int
460 rip_bind(inp, nam)
461 struct inpcb *inp;
462 struct mbuf *nam;
463 {
464 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
465
466 if (nam->m_len != sizeof(*addr))
467 return (EINVAL);
468 if (TAILQ_FIRST(&ifnet) == 0)
469 return (EADDRNOTAVAIL);
470 if (addr->sin_family != AF_INET &&
471 addr->sin_family != AF_IMPLINK)
472 return (EAFNOSUPPORT);
473 if (!in_nullhost(addr->sin_addr) &&
474 ifa_ifwithaddr(sintosa(addr)) == 0)
475 return (EADDRNOTAVAIL);
476 inp->inp_laddr = addr->sin_addr;
477 return (0);
478 }
479
480 int
481 rip_connect(inp, nam)
482 struct inpcb *inp;
483 struct mbuf *nam;
484 {
485 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
486
487 if (nam->m_len != sizeof(*addr))
488 return (EINVAL);
489 if (TAILQ_FIRST(&ifnet) == 0)
490 return (EADDRNOTAVAIL);
491 if (addr->sin_family != AF_INET &&
492 addr->sin_family != AF_IMPLINK)
493 return (EAFNOSUPPORT);
494 inp->inp_faddr = addr->sin_addr;
495 return (0);
496 }
497
498 void
499 rip_disconnect(inp)
500 struct inpcb *inp;
501 {
502
503 inp->inp_faddr = zeroin_addr;
504 }
505
506 u_long rip_sendspace = RIPSNDQ;
507 u_long rip_recvspace = RIPRCVQ;
508
509 /*ARGSUSED*/
510 int
511 rip_usrreq(so, req, m, nam, control, l)
512 struct socket *so;
513 int req;
514 struct mbuf *m, *nam, *control;
515 struct lwp *l;
516 {
517 struct inpcb *inp;
518 struct proc *p;
519 int s;
520 int error = 0;
521 #ifdef MROUTING
522 extern struct socket *ip_mrouter;
523 #endif
524
525 p = l ? l->l_proc : NULL;
526 if (req == PRU_CONTROL)
527 return (in_control(so, (long)m, (caddr_t)nam,
528 (struct ifnet *)control, p));
529
530 if (req == PRU_PURGEIF) {
531 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
532 in_purgeif((struct ifnet *)control);
533 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
534 return (0);
535 }
536
537 s = splsoftnet();
538 inp = sotoinpcb(so);
539 #ifdef DIAGNOSTIC
540 if (req != PRU_SEND && req != PRU_SENDOOB && control)
541 panic("rip_usrreq: unexpected control mbuf");
542 #endif
543 if (inp == 0 && req != PRU_ATTACH) {
544 error = EINVAL;
545 goto release;
546 }
547
548 switch (req) {
549
550 case PRU_ATTACH:
551 if (inp != 0) {
552 error = EISCONN;
553 break;
554 }
555 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
556 error = EACCES;
557 break;
558 }
559 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
560 error = soreserve(so, rip_sendspace, rip_recvspace);
561 if (error)
562 break;
563 }
564 error = in_pcballoc(so, &rawcbtable);
565 if (error)
566 break;
567 inp = sotoinpcb(so);
568 inp->inp_ip.ip_p = (long)nam;
569 break;
570
571 case PRU_DETACH:
572 #ifdef MROUTING
573 if (so == ip_mrouter)
574 ip_mrouter_done();
575 #endif
576 in_pcbdetach(inp);
577 break;
578
579 case PRU_BIND:
580 error = rip_bind(inp, nam);
581 break;
582
583 case PRU_LISTEN:
584 error = EOPNOTSUPP;
585 break;
586
587 case PRU_CONNECT:
588 error = rip_connect(inp, nam);
589 if (error)
590 break;
591 soisconnected(so);
592 break;
593
594 case PRU_CONNECT2:
595 error = EOPNOTSUPP;
596 break;
597
598 case PRU_DISCONNECT:
599 soisdisconnected(so);
600 rip_disconnect(inp);
601 break;
602
603 /*
604 * Mark the connection as being incapable of further input.
605 */
606 case PRU_SHUTDOWN:
607 socantsendmore(so);
608 break;
609
610 case PRU_RCVD:
611 error = EOPNOTSUPP;
612 break;
613
614 /*
615 * Ship a packet out. The appropriate raw output
616 * routine handles any massaging necessary.
617 */
618 case PRU_SEND:
619 if (control && control->m_len) {
620 m_freem(control);
621 m_freem(m);
622 error = EINVAL;
623 break;
624 }
625 {
626 if (nam) {
627 if ((so->so_state & SS_ISCONNECTED) != 0) {
628 error = EISCONN;
629 goto die;
630 }
631 error = rip_connect(inp, nam);
632 if (error) {
633 die:
634 m_freem(m);
635 break;
636 }
637 } else {
638 if ((so->so_state & SS_ISCONNECTED) == 0) {
639 error = ENOTCONN;
640 goto die;
641 }
642 }
643 error = rip_output(m, inp);
644 if (nam)
645 rip_disconnect(inp);
646 }
647 break;
648
649 case PRU_SENSE:
650 /*
651 * stat: don't bother with a blocksize.
652 */
653 splx(s);
654 return (0);
655
656 case PRU_RCVOOB:
657 error = EOPNOTSUPP;
658 break;
659
660 case PRU_SENDOOB:
661 m_freem(control);
662 m_freem(m);
663 error = EOPNOTSUPP;
664 break;
665
666 case PRU_SOCKADDR:
667 in_setsockaddr(inp, nam);
668 break;
669
670 case PRU_PEERADDR:
671 in_setpeeraddr(inp, nam);
672 break;
673
674 default:
675 panic("rip_usrreq");
676 }
677
678 release:
679 splx(s);
680 return (error);
681 }
682