raw_ip.c revision 1.55.2.2 1 /* $NetBSD: raw_ip.c,v 1.55.2.2 2001/11/14 19:17:55 nathanw Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
65 */
66
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.55.2.2 2001/11/14 19:17:55 nathanw Exp $");
69
70 #include "opt_ipsec.h"
71 #include "opt_mrouting.h"
72
73 #include <sys/param.h>
74 #include <sys/malloc.h>
75 #include <sys/mbuf.h>
76 #include <sys/socket.h>
77 #include <sys/protosw.h>
78 #include <sys/socketvar.h>
79 #include <sys/errno.h>
80 #include <sys/systm.h>
81 #include <sys/proc.h>
82
83 #include <net/if.h>
84 #include <net/route.h>
85
86 #include <netinet/in.h>
87 #include <netinet/in_systm.h>
88 #include <netinet/ip.h>
89 #include <netinet/ip_var.h>
90 #include <netinet/ip_mroute.h>
91 #include <netinet/ip_icmp.h>
92 #include <netinet/in_pcb.h>
93 #include <netinet/in_var.h>
94
95 #include <machine/stdarg.h>
96
97 #ifdef IPSEC
98 #include <netinet6/ipsec.h>
99 #endif /*IPSEC*/
100
101 struct inpcbtable rawcbtable;
102
103 int rip_bind __P((struct inpcb *, struct mbuf *));
104 int rip_connect __P((struct inpcb *, struct mbuf *));
105 void rip_disconnect __P((struct inpcb *));
106
107 /*
108 * Nominal space allocated to a raw ip socket.
109 */
110 #define RIPSNDQ 8192
111 #define RIPRCVQ 8192
112
113 /*
114 * Raw interface to IP protocol.
115 */
116
117 /*
118 * Initialize raw connection block q.
119 */
120 void
121 rip_init()
122 {
123
124 in_pcbinit(&rawcbtable, 1, 1);
125 }
126
127 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
128
129 /*
130 * Setup generic address and protocol structures
131 * for raw_input routine, then pass them along with
132 * mbuf chain.
133 */
134 void
135 #if __STDC__
136 rip_input(struct mbuf *m, ...)
137 #else
138 rip_input(m, va_alist)
139 struct mbuf *m;
140 va_dcl
141 #endif
142 {
143 int off, proto;
144 struct ip *ip = mtod(m, struct ip *);
145 struct inpcb *inp;
146 struct inpcb *last = 0;
147 struct mbuf *opts = 0;
148 struct sockaddr_in ripsrc;
149 va_list ap;
150
151 va_start(ap, m);
152 off = va_arg(ap, int);
153 proto = va_arg(ap, int);
154 va_end(ap);
155
156 ripsrc.sin_family = AF_INET;
157 ripsrc.sin_len = sizeof(struct sockaddr_in);
158 ripsrc.sin_addr = ip->ip_src;
159 ripsrc.sin_port = 0;
160 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
161
162 /*
163 * XXX Compatibility: programs using raw IP expect ip_len
164 * XXX to have the header length subtracted.
165 */
166 ip->ip_len -= ip->ip_hl << 2;
167
168 CIRCLEQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
169 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
170 continue;
171 if (!in_nullhost(inp->inp_laddr) &&
172 !in_hosteq(inp->inp_laddr, ip->ip_dst))
173 continue;
174 if (!in_nullhost(inp->inp_faddr) &&
175 !in_hosteq(inp->inp_faddr, ip->ip_src))
176 continue;
177 if (last) {
178 struct mbuf *n;
179
180 #ifdef IPSEC
181 /* check AH/ESP integrity. */
182 if (ipsec4_in_reject_so(m, last->inp_socket)) {
183 ipsecstat.in_polvio++;
184 /* do not inject data to pcb */
185 } else
186 #endif /*IPSEC*/
187 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
188 if (last->inp_flags & INP_CONTROLOPTS ||
189 last->inp_socket->so_options & SO_TIMESTAMP)
190 ip_savecontrol(last, &opts, ip, n);
191 if (sbappendaddr(&last->inp_socket->so_rcv,
192 sintosa(&ripsrc), n, opts) == 0) {
193 /* should notify about lost packet */
194 m_freem(n);
195 if (opts)
196 m_freem(opts);
197 } else
198 sorwakeup(last->inp_socket);
199 opts = NULL;
200 }
201 }
202 last = inp;
203 }
204 #ifdef IPSEC
205 /* check AH/ESP integrity. */
206 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
207 m_freem(m);
208 ipsecstat.in_polvio++;
209 ipstat.ips_delivered--;
210 /* do not inject data to pcb */
211 } else
212 #endif /*IPSEC*/
213 if (last) {
214 if (last->inp_flags & INP_CONTROLOPTS ||
215 last->inp_socket->so_options & SO_TIMESTAMP)
216 ip_savecontrol(last, &opts, ip, m);
217 if (sbappendaddr(&last->inp_socket->so_rcv,
218 sintosa(&ripsrc), m, opts) == 0) {
219 m_freem(m);
220 if (opts)
221 m_freem(opts);
222 } else
223 sorwakeup(last->inp_socket);
224 } else {
225 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
226 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
227 0, 0);
228 ipstat.ips_noproto++;
229 ipstat.ips_delivered--;
230 } else
231 m_freem(m);
232 }
233 return;
234 }
235
236 /*
237 * Generate IP header and pass packet to ip_output.
238 * Tack on options user may have setup with control call.
239 */
240 int
241 #if __STDC__
242 rip_output(struct mbuf *m, ...)
243 #else
244 rip_output(m, va_alist)
245 struct mbuf *m;
246 va_dcl
247 #endif
248 {
249 struct inpcb *inp;
250 struct ip *ip;
251 struct mbuf *opts;
252 int flags;
253 va_list ap;
254
255 va_start(ap, m);
256 inp = va_arg(ap, struct inpcb *);
257 va_end(ap);
258
259 flags =
260 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
261 | IP_RETURNMTU;
262
263 /*
264 * If the user handed us a complete IP packet, use it.
265 * Otherwise, allocate an mbuf for a header and fill it in.
266 */
267 if ((inp->inp_flags & INP_HDRINCL) == 0) {
268 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
269 m_freem(m);
270 return (EMSGSIZE);
271 }
272 M_PREPEND(m, sizeof(struct ip), M_WAIT);
273 ip = mtod(m, struct ip *);
274 ip->ip_tos = 0;
275 ip->ip_off = 0;
276 ip->ip_p = inp->inp_ip.ip_p;
277 ip->ip_len = m->m_pkthdr.len;
278 ip->ip_src = inp->inp_laddr;
279 ip->ip_dst = inp->inp_faddr;
280 ip->ip_ttl = MAXTTL;
281 opts = inp->inp_options;
282 } else {
283 if (m->m_pkthdr.len > IP_MAXPACKET) {
284 m_freem(m);
285 return (EMSGSIZE);
286 }
287 ip = mtod(m, struct ip *);
288 if (m->m_pkthdr.len != ip->ip_len) {
289 m_freem(m);
290 return (EINVAL);
291 }
292 if (ip->ip_id == 0)
293 ip->ip_id = htons(ip_id++);
294 opts = NULL;
295 /* XXX prevent ip_output from overwriting header fields */
296 flags |= IP_RAWOUTPUT;
297 ipstat.ips_rawout++;
298 }
299 #ifdef IPSEC
300 if (ipsec_setsocket(m, inp->inp_socket) != 0) {
301 m_freem(m);
302 return ENOBUFS;
303 }
304 #endif /*IPSEC*/
305 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, &inp->inp_errormtu));
306 }
307
308 /*
309 * Raw IP socket option processing.
310 */
311 int
312 rip_ctloutput(op, so, level, optname, m)
313 int op;
314 struct socket *so;
315 int level, optname;
316 struct mbuf **m;
317 {
318 struct inpcb *inp = sotoinpcb(so);
319 int error = 0;
320
321 if (level != IPPROTO_IP) {
322 error = ENOPROTOOPT;
323 if (op == PRCO_SETOPT && *m != 0)
324 (void) m_free(*m);
325 } else switch (op) {
326
327 case PRCO_SETOPT:
328 switch (optname) {
329 case IP_HDRINCL:
330 if (*m == 0 || (*m)->m_len < sizeof (int))
331 error = EINVAL;
332 else {
333 if (*mtod(*m, int *))
334 inp->inp_flags |= INP_HDRINCL;
335 else
336 inp->inp_flags &= ~INP_HDRINCL;
337 }
338 if (*m != 0)
339 (void) m_free(*m);
340 break;
341
342 #ifdef MROUTING
343 case MRT_INIT:
344 case MRT_DONE:
345 case MRT_ADD_VIF:
346 case MRT_DEL_VIF:
347 case MRT_ADD_MFC:
348 case MRT_DEL_MFC:
349 case MRT_ASSERT:
350 error = ip_mrouter_set(so, optname, m);
351 break;
352 #endif
353
354 default:
355 error = ip_ctloutput(op, so, level, optname, m);
356 break;
357 }
358 break;
359
360 case PRCO_GETOPT:
361 switch (optname) {
362 case IP_HDRINCL:
363 *m = m_get(M_WAIT, M_SOOPTS);
364 (*m)->m_len = sizeof (int);
365 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
366 break;
367
368 #ifdef MROUTING
369 case MRT_VERSION:
370 case MRT_ASSERT:
371 error = ip_mrouter_get(so, optname, m);
372 break;
373 #endif
374
375 default:
376 error = ip_ctloutput(op, so, level, optname, m);
377 break;
378 }
379 break;
380 }
381 return (error);
382 }
383
384 int
385 rip_bind(inp, nam)
386 struct inpcb *inp;
387 struct mbuf *nam;
388 {
389 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
390
391 if (nam->m_len != sizeof(*addr))
392 return (EINVAL);
393 if (TAILQ_FIRST(&ifnet) == 0)
394 return (EADDRNOTAVAIL);
395 if (addr->sin_family != AF_INET &&
396 addr->sin_family != AF_IMPLINK)
397 return (EAFNOSUPPORT);
398 if (!in_nullhost(addr->sin_addr) &&
399 ifa_ifwithaddr(sintosa(addr)) == 0)
400 return (EADDRNOTAVAIL);
401 inp->inp_laddr = addr->sin_addr;
402 return (0);
403 }
404
405 int
406 rip_connect(inp, nam)
407 struct inpcb *inp;
408 struct mbuf *nam;
409 {
410 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
411
412 if (nam->m_len != sizeof(*addr))
413 return (EINVAL);
414 if (TAILQ_FIRST(&ifnet) == 0)
415 return (EADDRNOTAVAIL);
416 if (addr->sin_family != AF_INET &&
417 addr->sin_family != AF_IMPLINK)
418 return (EAFNOSUPPORT);
419 inp->inp_faddr = addr->sin_addr;
420 return (0);
421 }
422
423 void
424 rip_disconnect(inp)
425 struct inpcb *inp;
426 {
427
428 inp->inp_faddr = zeroin_addr;
429 }
430
431 u_long rip_sendspace = RIPSNDQ;
432 u_long rip_recvspace = RIPRCVQ;
433
434 /*ARGSUSED*/
435 int
436 rip_usrreq(so, req, m, nam, control, p)
437 struct socket *so;
438 int req;
439 struct mbuf *m, *nam, *control;
440 struct proc *p;
441 {
442 struct inpcb *inp;
443 int s;
444 int error = 0;
445 #ifdef MROUTING
446 extern struct socket *ip_mrouter;
447 #endif
448
449 if (req == PRU_CONTROL)
450 return (in_control(so, (long)m, (caddr_t)nam,
451 (struct ifnet *)control, p));
452
453 if (req == PRU_PURGEIF) {
454 in_pcbpurgeif0(&rawcbtable, (struct ifnet *)control);
455 in_purgeif((struct ifnet *)control);
456 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
457 return (0);
458 }
459
460 s = splsoftnet();
461 inp = sotoinpcb(so);
462 #ifdef DIAGNOSTIC
463 if (req != PRU_SEND && req != PRU_SENDOOB && control)
464 panic("rip_usrreq: unexpected control mbuf");
465 #endif
466 if (inp == 0 && req != PRU_ATTACH) {
467 error = EINVAL;
468 goto release;
469 }
470
471 switch (req) {
472
473 case PRU_ATTACH:
474 if (inp != 0) {
475 error = EISCONN;
476 break;
477 }
478 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
479 error = EACCES;
480 break;
481 }
482 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
483 error = soreserve(so, rip_sendspace, rip_recvspace);
484 if (error)
485 break;
486 }
487 error = in_pcballoc(so, &rawcbtable);
488 if (error)
489 break;
490 inp = sotoinpcb(so);
491 inp->inp_ip.ip_p = (long)nam;
492 break;
493
494 case PRU_DETACH:
495 #ifdef MROUTING
496 if (so == ip_mrouter)
497 ip_mrouter_done();
498 #endif
499 in_pcbdetach(inp);
500 break;
501
502 case PRU_BIND:
503 error = rip_bind(inp, nam);
504 break;
505
506 case PRU_LISTEN:
507 error = EOPNOTSUPP;
508 break;
509
510 case PRU_CONNECT:
511 error = rip_connect(inp, nam);
512 if (error)
513 break;
514 soisconnected(so);
515 break;
516
517 case PRU_CONNECT2:
518 error = EOPNOTSUPP;
519 break;
520
521 case PRU_DISCONNECT:
522 soisdisconnected(so);
523 rip_disconnect(inp);
524 break;
525
526 /*
527 * Mark the connection as being incapable of further input.
528 */
529 case PRU_SHUTDOWN:
530 socantsendmore(so);
531 break;
532
533 case PRU_RCVD:
534 error = EOPNOTSUPP;
535 break;
536
537 /*
538 * Ship a packet out. The appropriate raw output
539 * routine handles any massaging necessary.
540 */
541 case PRU_SEND:
542 if (control && control->m_len) {
543 m_freem(control);
544 m_freem(m);
545 error = EINVAL;
546 break;
547 }
548 {
549 if (nam) {
550 if ((so->so_state & SS_ISCONNECTED) != 0) {
551 error = EISCONN;
552 goto die;
553 }
554 error = rip_connect(inp, nam);
555 if (error) {
556 die:
557 m_freem(m);
558 break;
559 }
560 } else {
561 if ((so->so_state & SS_ISCONNECTED) == 0) {
562 error = ENOTCONN;
563 goto die;
564 }
565 }
566 error = rip_output(m, inp);
567 if (nam)
568 rip_disconnect(inp);
569 }
570 break;
571
572 case PRU_SENSE:
573 /*
574 * stat: don't bother with a blocksize.
575 */
576 splx(s);
577 return (0);
578
579 case PRU_RCVOOB:
580 error = EOPNOTSUPP;
581 break;
582
583 case PRU_SENDOOB:
584 m_freem(control);
585 m_freem(m);
586 error = EOPNOTSUPP;
587 break;
588
589 case PRU_SOCKADDR:
590 in_setsockaddr(inp, nam);
591 break;
592
593 case PRU_PEERADDR:
594 in_setpeeraddr(inp, nam);
595 break;
596
597 default:
598 panic("rip_usrreq");
599 }
600
601 release:
602 splx(s);
603 return (error);
604 }
605