raw_ip.c revision 1.79 1 /* $NetBSD: raw_ip.c,v 1.79 2004/04/26 01:31:57 matt Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
61 */
62
63 #include <sys/cdefs.h>
64 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.79 2004/04/26 01:31:57 matt Exp $");
65
66 #include "opt_inet.h"
67 #include "opt_ipsec.h"
68 #include "opt_mrouting.h"
69
70 #include <sys/param.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/socket.h>
74 #include <sys/protosw.h>
75 #include <sys/socketvar.h>
76 #include <sys/errno.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/ip_mroute.h>
88 #include <netinet/ip_icmp.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_var.h>
91
92 #include <machine/stdarg.h>
93
94 #ifdef IPSEC
95 #include <netinet6/ipsec.h>
96 #endif /*IPSEC*/
97
98 #ifdef FAST_IPSEC
99 #include <netipsec/ipsec.h>
100 #endif /* FAST_IPSEC*/
101
102 struct inpcbtable rawcbtable;
103
104 int rip_pcbnotify __P((struct inpcbtable *, struct in_addr,
105 struct in_addr, int, int, void (*) __P((struct inpcb *, int))));
106 int rip_bind __P((struct inpcb *, struct mbuf *));
107 int rip_connect __P((struct inpcb *, struct mbuf *));
108 void rip_disconnect __P((struct inpcb *));
109
110 /*
111 * Nominal space allocated to a raw ip socket.
112 */
113 #define RIPSNDQ 8192
114 #define RIPRCVQ 8192
115
116 /*
117 * Raw interface to IP protocol.
118 */
119
120 /*
121 * Initialize raw connection block q.
122 */
123 void
124 rip_init()
125 {
126
127 in_pcbinit(&rawcbtable, 1, 1);
128 }
129
130 /*
131 * Setup generic address and protocol structures
132 * for raw_input routine, then pass them along with
133 * mbuf chain.
134 */
135 void
136 rip_input(struct mbuf *m, ...)
137 {
138 int proto;
139 struct ip *ip = mtod(m, struct ip *);
140 struct inpcb_hdr *inph;
141 struct inpcb *inp;
142 struct inpcb *last = 0;
143 struct mbuf *opts = 0;
144 struct sockaddr_in ripsrc;
145 va_list ap;
146
147 va_start(ap, m);
148 (void)va_arg(ap, int); /* ignore value, advance ap */
149 proto = va_arg(ap, int);
150 va_end(ap);
151
152 ripsrc.sin_family = AF_INET;
153 ripsrc.sin_len = sizeof(struct sockaddr_in);
154 ripsrc.sin_addr = ip->ip_src;
155 ripsrc.sin_port = 0;
156 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
157
158 /*
159 * XXX Compatibility: programs using raw IP expect ip_len
160 * XXX to have the header length subtracted, and in host order.
161 * XXX ip_off is also expected to be host order.
162 */
163 ip->ip_len = ntohs(ip->ip_len) - (ip->ip_hl << 2);
164 NTOHS(ip->ip_off);
165
166 CIRCLEQ_FOREACH(inph, &rawcbtable.inpt_queue, inph_queue) {
167 inp = (struct inpcb *)inph;
168 if (inp->inp_af != AF_INET)
169 continue;
170 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
171 continue;
172 if (!in_nullhost(inp->inp_laddr) &&
173 !in_hosteq(inp->inp_laddr, ip->ip_dst))
174 continue;
175 if (!in_nullhost(inp->inp_faddr) &&
176 !in_hosteq(inp->inp_faddr, ip->ip_src))
177 continue;
178 if (last) {
179 struct mbuf *n;
180
181 #if defined(IPSEC) || defined(FAST_IPSEC)
182 /* check AH/ESP integrity. */
183 if (ipsec4_in_reject_so(m, last->inp_socket)) {
184 ipsecstat.in_polvio++;
185 /* do not inject data to pcb */
186 } else
187 #endif /*IPSEC*/
188 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
189 if (last->inp_flags & INP_CONTROLOPTS ||
190 last->inp_socket->so_options & SO_TIMESTAMP)
191 ip_savecontrol(last, &opts, ip, n);
192 if (sbappendaddr(&last->inp_socket->so_rcv,
193 sintosa(&ripsrc), n, opts) == 0) {
194 /* should notify about lost packet */
195 m_freem(n);
196 if (opts)
197 m_freem(opts);
198 } else
199 sorwakeup(last->inp_socket);
200 opts = NULL;
201 }
202 }
203 last = inp;
204 }
205 #if defined(IPSEC) || defined(FAST_IPSEC)
206 /* check AH/ESP integrity. */
207 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
208 m_freem(m);
209 ipsecstat.in_polvio++;
210 ipstat.ips_delivered--;
211 /* do not inject data to pcb */
212 } else
213 #endif /*IPSEC*/
214 if (last) {
215 if (last->inp_flags & INP_CONTROLOPTS ||
216 last->inp_socket->so_options & SO_TIMESTAMP)
217 ip_savecontrol(last, &opts, ip, m);
218 if (sbappendaddr(&last->inp_socket->so_rcv,
219 sintosa(&ripsrc), m, opts) == 0) {
220 m_freem(m);
221 if (opts)
222 m_freem(opts);
223 } else
224 sorwakeup(last->inp_socket);
225 } else {
226 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
227 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
228 0, 0);
229 ipstat.ips_noproto++;
230 ipstat.ips_delivered--;
231 } else
232 m_freem(m);
233 }
234 return;
235 }
236
237 int
238 rip_pcbnotify(table, faddr, laddr, proto, errno, notify)
239 struct inpcbtable *table;
240 struct in_addr faddr, laddr;
241 int proto;
242 int errno;
243 void (*notify) __P((struct inpcb *, int));
244 {
245 struct inpcb *inp, *ninp;
246 int nmatch;
247
248 nmatch = 0;
249 for (inp = (struct inpcb *)CIRCLEQ_FIRST(&table->inpt_queue);
250 inp != (struct inpcb *)&table->inpt_queue;
251 inp = ninp) {
252 ninp = (struct inpcb *)inp->inp_queue.cqe_next;
253 if (inp->inp_af != AF_INET)
254 continue;
255 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
256 continue;
257 if (in_hosteq(inp->inp_faddr, faddr) &&
258 in_hosteq(inp->inp_laddr, laddr)) {
259 (*notify)(inp, errno);
260 nmatch++;
261 }
262 }
263
264 return nmatch;
265 }
266
267 void *
268 rip_ctlinput(cmd, sa, v)
269 int cmd;
270 struct sockaddr *sa;
271 void *v;
272 {
273 struct ip *ip = v;
274 void (*notify) __P((struct inpcb *, int)) = in_rtchange;
275 int errno;
276
277 if (sa->sa_family != AF_INET ||
278 sa->sa_len != sizeof(struct sockaddr_in))
279 return NULL;
280 if ((unsigned)cmd >= PRC_NCMDS)
281 return NULL;
282 errno = inetctlerrmap[cmd];
283 if (PRC_IS_REDIRECT(cmd))
284 notify = in_rtchange, ip = 0;
285 else if (cmd == PRC_HOSTDEAD)
286 ip = 0;
287 else if (errno == 0)
288 return NULL;
289 if (ip) {
290 rip_pcbnotify(&rawcbtable, satosin(sa)->sin_addr,
291 ip->ip_src, ip->ip_p, errno, notify);
292
293 /* XXX mapped address case */
294 } else
295 in_pcbnotifyall(&rawcbtable, satosin(sa)->sin_addr, errno,
296 notify);
297 return NULL;
298 }
299
300 /*
301 * Generate IP header and pass packet to ip_output.
302 * Tack on options user may have setup with control call.
303 */
304 int
305 rip_output(struct mbuf *m, ...)
306 {
307 struct inpcb *inp;
308 struct ip *ip;
309 struct mbuf *opts;
310 int flags;
311 va_list ap;
312
313 va_start(ap, m);
314 inp = va_arg(ap, struct inpcb *);
315 va_end(ap);
316
317 flags =
318 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
319 | IP_RETURNMTU;
320
321 /*
322 * If the user handed us a complete IP packet, use it.
323 * Otherwise, allocate an mbuf for a header and fill it in.
324 */
325 if ((inp->inp_flags & INP_HDRINCL) == 0) {
326 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
327 m_freem(m);
328 return (EMSGSIZE);
329 }
330 M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
331 if (!m)
332 return (ENOBUFS);
333 ip = mtod(m, struct ip *);
334 ip->ip_tos = 0;
335 ip->ip_off = htons(0);
336 ip->ip_p = inp->inp_ip.ip_p;
337 ip->ip_len = htons(m->m_pkthdr.len);
338 ip->ip_src = inp->inp_laddr;
339 ip->ip_dst = inp->inp_faddr;
340 ip->ip_ttl = MAXTTL;
341 opts = inp->inp_options;
342 } else {
343 if (m->m_pkthdr.len > IP_MAXPACKET) {
344 m_freem(m);
345 return (EMSGSIZE);
346 }
347 ip = mtod(m, struct ip *);
348
349 /*
350 * If the mbuf is read-only, we need to allocate
351 * a new mbuf for the header, since we need to
352 * modify the header.
353 */
354 if (M_READONLY(m)) {
355 int hlen = ip->ip_hl << 2;
356
357 m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3);
358 if (m == NULL)
359 return (ENOMEM); /* XXX */
360 ip = mtod(m, struct ip *);
361 }
362
363 /* XXX userland passes ip_len and ip_off in host order */
364 if (m->m_pkthdr.len != ip->ip_len) {
365 m_freem(m);
366 return (EINVAL);
367 }
368 HTONS(ip->ip_len);
369 HTONS(ip->ip_off);
370 if (ip->ip_id == 0)
371 ip->ip_id = ip_newid();
372 opts = NULL;
373 /* XXX prevent ip_output from overwriting header fields */
374 flags |= IP_RAWOUTPUT;
375 ipstat.ips_rawout++;
376 }
377 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions,
378 inp->inp_socket, &inp->inp_errormtu));
379 }
380
381 /*
382 * Raw IP socket option processing.
383 */
384 int
385 rip_ctloutput(op, so, level, optname, m)
386 int op;
387 struct socket *so;
388 int level, optname;
389 struct mbuf **m;
390 {
391 struct inpcb *inp = sotoinpcb(so);
392 int error = 0;
393
394 if (level != IPPROTO_IP) {
395 error = ENOPROTOOPT;
396 if (op == PRCO_SETOPT && *m != 0)
397 (void) m_free(*m);
398 } else switch (op) {
399
400 case PRCO_SETOPT:
401 switch (optname) {
402 case IP_HDRINCL:
403 if (*m == 0 || (*m)->m_len < sizeof (int))
404 error = EINVAL;
405 else {
406 if (*mtod(*m, int *))
407 inp->inp_flags |= INP_HDRINCL;
408 else
409 inp->inp_flags &= ~INP_HDRINCL;
410 }
411 if (*m != 0)
412 (void) m_free(*m);
413 break;
414
415 #ifdef MROUTING
416 case MRT_INIT:
417 case MRT_DONE:
418 case MRT_ADD_VIF:
419 case MRT_DEL_VIF:
420 case MRT_ADD_MFC:
421 case MRT_DEL_MFC:
422 case MRT_ASSERT:
423 error = ip_mrouter_set(so, optname, m);
424 break;
425 #endif
426
427 default:
428 error = ip_ctloutput(op, so, level, optname, m);
429 break;
430 }
431 break;
432
433 case PRCO_GETOPT:
434 switch (optname) {
435 case IP_HDRINCL:
436 *m = m_get(M_WAIT, MT_SOOPTS);
437 MCLAIM((*m), so->so_mowner);
438 (*m)->m_len = sizeof (int);
439 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
440 break;
441
442 #ifdef MROUTING
443 case MRT_VERSION:
444 case MRT_ASSERT:
445 error = ip_mrouter_get(so, optname, m);
446 break;
447 #endif
448
449 default:
450 error = ip_ctloutput(op, so, level, optname, m);
451 break;
452 }
453 break;
454 }
455 return (error);
456 }
457
458 int
459 rip_bind(inp, nam)
460 struct inpcb *inp;
461 struct mbuf *nam;
462 {
463 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
464
465 if (nam->m_len != sizeof(*addr))
466 return (EINVAL);
467 if (TAILQ_FIRST(&ifnet) == 0)
468 return (EADDRNOTAVAIL);
469 if (addr->sin_family != AF_INET &&
470 addr->sin_family != AF_IMPLINK)
471 return (EAFNOSUPPORT);
472 if (!in_nullhost(addr->sin_addr) &&
473 ifa_ifwithaddr(sintosa(addr)) == 0)
474 return (EADDRNOTAVAIL);
475 inp->inp_laddr = addr->sin_addr;
476 return (0);
477 }
478
479 int
480 rip_connect(inp, nam)
481 struct inpcb *inp;
482 struct mbuf *nam;
483 {
484 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
485
486 if (nam->m_len != sizeof(*addr))
487 return (EINVAL);
488 if (TAILQ_FIRST(&ifnet) == 0)
489 return (EADDRNOTAVAIL);
490 if (addr->sin_family != AF_INET &&
491 addr->sin_family != AF_IMPLINK)
492 return (EAFNOSUPPORT);
493 inp->inp_faddr = addr->sin_addr;
494 return (0);
495 }
496
497 void
498 rip_disconnect(inp)
499 struct inpcb *inp;
500 {
501
502 inp->inp_faddr = zeroin_addr;
503 }
504
505 u_long rip_sendspace = RIPSNDQ;
506 u_long rip_recvspace = RIPRCVQ;
507
508 /*ARGSUSED*/
509 int
510 rip_usrreq(so, req, m, nam, control, p)
511 struct socket *so;
512 int req;
513 struct mbuf *m, *nam, *control;
514 struct proc *p;
515 {
516 struct inpcb *inp;
517 int s;
518 int error = 0;
519 #ifdef MROUTING
520 extern struct socket *ip_mrouter;
521 #endif
522
523 if (req == PRU_CONTROL)
524 return (in_control(so, (long)m, (caddr_t)nam,
525 (struct ifnet *)control, p));
526
527 if (req == PRU_PURGEIF) {
528 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
529 in_purgeif((struct ifnet *)control);
530 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
531 return (0);
532 }
533
534 s = splsoftnet();
535 inp = sotoinpcb(so);
536 #ifdef DIAGNOSTIC
537 if (req != PRU_SEND && req != PRU_SENDOOB && control)
538 panic("rip_usrreq: unexpected control mbuf");
539 #endif
540 if (inp == 0 && req != PRU_ATTACH) {
541 error = EINVAL;
542 goto release;
543 }
544
545 switch (req) {
546
547 case PRU_ATTACH:
548 if (inp != 0) {
549 error = EISCONN;
550 break;
551 }
552 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
553 error = EACCES;
554 break;
555 }
556 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
557 error = soreserve(so, rip_sendspace, rip_recvspace);
558 if (error)
559 break;
560 }
561 error = in_pcballoc(so, &rawcbtable);
562 if (error)
563 break;
564 inp = sotoinpcb(so);
565 inp->inp_ip.ip_p = (long)nam;
566 break;
567
568 case PRU_DETACH:
569 #ifdef MROUTING
570 if (so == ip_mrouter)
571 ip_mrouter_done();
572 #endif
573 in_pcbdetach(inp);
574 break;
575
576 case PRU_BIND:
577 error = rip_bind(inp, nam);
578 break;
579
580 case PRU_LISTEN:
581 error = EOPNOTSUPP;
582 break;
583
584 case PRU_CONNECT:
585 error = rip_connect(inp, nam);
586 if (error)
587 break;
588 soisconnected(so);
589 break;
590
591 case PRU_CONNECT2:
592 error = EOPNOTSUPP;
593 break;
594
595 case PRU_DISCONNECT:
596 soisdisconnected(so);
597 rip_disconnect(inp);
598 break;
599
600 /*
601 * Mark the connection as being incapable of further input.
602 */
603 case PRU_SHUTDOWN:
604 socantsendmore(so);
605 break;
606
607 case PRU_RCVD:
608 error = EOPNOTSUPP;
609 break;
610
611 /*
612 * Ship a packet out. The appropriate raw output
613 * routine handles any massaging necessary.
614 */
615 case PRU_SEND:
616 if (control && control->m_len) {
617 m_freem(control);
618 m_freem(m);
619 error = EINVAL;
620 break;
621 }
622 {
623 if (nam) {
624 if ((so->so_state & SS_ISCONNECTED) != 0) {
625 error = EISCONN;
626 goto die;
627 }
628 error = rip_connect(inp, nam);
629 if (error) {
630 die:
631 m_freem(m);
632 break;
633 }
634 } else {
635 if ((so->so_state & SS_ISCONNECTED) == 0) {
636 error = ENOTCONN;
637 goto die;
638 }
639 }
640 error = rip_output(m, inp);
641 if (nam)
642 rip_disconnect(inp);
643 }
644 break;
645
646 case PRU_SENSE:
647 /*
648 * stat: don't bother with a blocksize.
649 */
650 splx(s);
651 return (0);
652
653 case PRU_RCVOOB:
654 error = EOPNOTSUPP;
655 break;
656
657 case PRU_SENDOOB:
658 m_freem(control);
659 m_freem(m);
660 error = EOPNOTSUPP;
661 break;
662
663 case PRU_SOCKADDR:
664 in_setsockaddr(inp, nam);
665 break;
666
667 case PRU_PEERADDR:
668 in_setpeeraddr(inp, nam);
669 break;
670
671 default:
672 panic("rip_usrreq");
673 }
674
675 release:
676 splx(s);
677 return (error);
678 }
679