raw_ip.c revision 1.55 1 /* $NetBSD: raw_ip.c,v 1.55 2001/02/26 07:20:44 itojun Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1993
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. All advertising materials mentioning features or use of this software
45 * must display the following acknowledgement:
46 * This product includes software developed by the University of
47 * California, Berkeley and its contributors.
48 * 4. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
51 *
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62 * SUCH DAMAGE.
63 *
64 * @(#)raw_ip.c 8.7 (Berkeley) 5/15/95
65 */
66
67 #include "opt_ipsec.h"
68 #include "opt_mrouting.h"
69
70 #include <sys/param.h>
71 #include <sys/malloc.h>
72 #include <sys/mbuf.h>
73 #include <sys/socket.h>
74 #include <sys/protosw.h>
75 #include <sys/socketvar.h>
76 #include <sys/errno.h>
77 #include <sys/systm.h>
78 #include <sys/proc.h>
79
80 #include <net/if.h>
81 #include <net/route.h>
82
83 #include <netinet/in.h>
84 #include <netinet/in_systm.h>
85 #include <netinet/ip.h>
86 #include <netinet/ip_var.h>
87 #include <netinet/ip_mroute.h>
88 #include <netinet/ip_icmp.h>
89 #include <netinet/in_pcb.h>
90 #include <netinet/in_var.h>
91
92 #include <machine/stdarg.h>
93
94 #ifdef IPSEC
95 #include <netinet6/ipsec.h>
96 #endif /*IPSEC*/
97
98 struct inpcbtable rawcbtable;
99
100 int rip_bind __P((struct inpcb *, struct mbuf *));
101 int rip_connect __P((struct inpcb *, struct mbuf *));
102 void rip_disconnect __P((struct inpcb *));
103
104 /*
105 * Nominal space allocated to a raw ip socket.
106 */
107 #define RIPSNDQ 8192
108 #define RIPRCVQ 8192
109
110 /*
111 * Raw interface to IP protocol.
112 */
113
114 /*
115 * Initialize raw connection block q.
116 */
117 void
118 rip_init()
119 {
120
121 in_pcbinit(&rawcbtable, 1, 1);
122 }
123
124 static struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET };
125
126 /*
127 * Setup generic address and protocol structures
128 * for raw_input routine, then pass them along with
129 * mbuf chain.
130 */
131 void
132 #if __STDC__
133 rip_input(struct mbuf *m, ...)
134 #else
135 rip_input(m, va_alist)
136 struct mbuf *m;
137 va_dcl
138 #endif
139 {
140 int off, proto;
141 struct ip *ip = mtod(m, struct ip *);
142 struct inpcb *inp;
143 struct inpcb *last = 0;
144 struct mbuf *opts = 0;
145 struct sockaddr_in ripsrc;
146 va_list ap;
147
148 va_start(ap, m);
149 off = va_arg(ap, int);
150 proto = va_arg(ap, int);
151 va_end(ap);
152
153 ripsrc.sin_family = AF_INET;
154 ripsrc.sin_len = sizeof(struct sockaddr_in);
155 ripsrc.sin_addr = ip->ip_src;
156 ripsrc.sin_port = 0;
157 bzero((caddr_t)ripsrc.sin_zero, sizeof(ripsrc.sin_zero));
158
159 /*
160 * XXX Compatibility: programs using raw IP expect ip_len
161 * XXX to have the header length subtracted.
162 */
163 ip->ip_len -= ip->ip_hl << 2;
164
165 for (inp = rawcbtable.inpt_queue.cqh_first;
166 inp != (struct inpcb *)&rawcbtable.inpt_queue;
167 inp = inp->inp_queue.cqe_next) {
168 if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != proto)
169 continue;
170 if (!in_nullhost(inp->inp_laddr) &&
171 !in_hosteq(inp->inp_laddr, ip->ip_dst))
172 continue;
173 if (!in_nullhost(inp->inp_faddr) &&
174 !in_hosteq(inp->inp_faddr, ip->ip_src))
175 continue;
176 if (last) {
177 struct mbuf *n;
178
179 #ifdef IPSEC
180 /* check AH/ESP integrity. */
181 if (ipsec4_in_reject_so(m, last->inp_socket)) {
182 ipsecstat.in_polvio++;
183 /* do not inject data to pcb */
184 } else
185 #endif /*IPSEC*/
186 if ((n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
187 if (last->inp_flags & INP_CONTROLOPTS ||
188 last->inp_socket->so_options & SO_TIMESTAMP)
189 ip_savecontrol(last, &opts, ip, n);
190 if (sbappendaddr(&last->inp_socket->so_rcv,
191 sintosa(&ripsrc), n, opts) == 0) {
192 /* should notify about lost packet */
193 m_freem(n);
194 if (opts)
195 m_freem(opts);
196 } else
197 sorwakeup(last->inp_socket);
198 opts = NULL;
199 }
200 }
201 last = inp;
202 }
203 #ifdef IPSEC
204 /* check AH/ESP integrity. */
205 if (last && ipsec4_in_reject_so(m, last->inp_socket)) {
206 m_freem(m);
207 ipsecstat.in_polvio++;
208 ipstat.ips_delivered--;
209 /* do not inject data to pcb */
210 } else
211 #endif /*IPSEC*/
212 if (last) {
213 if (last->inp_flags & INP_CONTROLOPTS ||
214 last->inp_socket->so_options & SO_TIMESTAMP)
215 ip_savecontrol(last, &opts, ip, m);
216 if (sbappendaddr(&last->inp_socket->so_rcv,
217 sintosa(&ripsrc), m, opts) == 0) {
218 m_freem(m);
219 if (opts)
220 m_freem(opts);
221 } else
222 sorwakeup(last->inp_socket);
223 } else {
224 if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
225 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
226 0, 0);
227 ipstat.ips_noproto++;
228 ipstat.ips_delivered--;
229 } else
230 m_freem(m);
231 }
232 return;
233 }
234
235 /*
236 * Generate IP header and pass packet to ip_output.
237 * Tack on options user may have setup with control call.
238 */
239 int
240 #if __STDC__
241 rip_output(struct mbuf *m, ...)
242 #else
243 rip_output(m, va_alist)
244 struct mbuf *m;
245 va_dcl
246 #endif
247 {
248 struct inpcb *inp;
249 struct ip *ip;
250 struct mbuf *opts;
251 int flags;
252 va_list ap;
253
254 va_start(ap, m);
255 inp = va_arg(ap, struct inpcb *);
256 va_end(ap);
257
258 flags =
259 (inp->inp_socket->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST
260 | IP_RETURNMTU;
261
262 /*
263 * If the user handed us a complete IP packet, use it.
264 * Otherwise, allocate an mbuf for a header and fill it in.
265 */
266 if ((inp->inp_flags & INP_HDRINCL) == 0) {
267 if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
268 m_freem(m);
269 return (EMSGSIZE);
270 }
271 M_PREPEND(m, sizeof(struct ip), M_WAIT);
272 ip = mtod(m, struct ip *);
273 ip->ip_tos = 0;
274 ip->ip_off = 0;
275 ip->ip_p = inp->inp_ip.ip_p;
276 ip->ip_len = m->m_pkthdr.len;
277 ip->ip_src = inp->inp_laddr;
278 ip->ip_dst = inp->inp_faddr;
279 ip->ip_ttl = MAXTTL;
280 opts = inp->inp_options;
281 } else {
282 if (m->m_pkthdr.len > IP_MAXPACKET) {
283 m_freem(m);
284 return (EMSGSIZE);
285 }
286 ip = mtod(m, struct ip *);
287 if (m->m_pkthdr.len != ip->ip_len) {
288 m_freem(m);
289 return (EINVAL);
290 }
291 if (ip->ip_id == 0)
292 ip->ip_id = htons(ip_id++);
293 opts = NULL;
294 /* XXX prevent ip_output from overwriting header fields */
295 flags |= IP_RAWOUTPUT;
296 ipstat.ips_rawout++;
297 }
298 #ifdef IPSEC
299 if (ipsec_setsocket(m, inp->inp_socket) != 0) {
300 m_freem(m);
301 return ENOBUFS;
302 }
303 #endif /*IPSEC*/
304 return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions, &inp->inp_errormtu));
305 }
306
307 /*
308 * Raw IP socket option processing.
309 */
310 int
311 rip_ctloutput(op, so, level, optname, m)
312 int op;
313 struct socket *so;
314 int level, optname;
315 struct mbuf **m;
316 {
317 struct inpcb *inp = sotoinpcb(so);
318 int error = 0;
319
320 if (level != IPPROTO_IP) {
321 error = ENOPROTOOPT;
322 if (op == PRCO_SETOPT && *m != 0)
323 (void) m_free(*m);
324 } else switch (op) {
325
326 case PRCO_SETOPT:
327 switch (optname) {
328 case IP_HDRINCL:
329 if (*m == 0 || (*m)->m_len < sizeof (int))
330 error = EINVAL;
331 else {
332 if (*mtod(*m, int *))
333 inp->inp_flags |= INP_HDRINCL;
334 else
335 inp->inp_flags &= ~INP_HDRINCL;
336 }
337 if (*m != 0)
338 (void) m_free(*m);
339 break;
340
341 #ifdef MROUTING
342 case MRT_INIT:
343 case MRT_DONE:
344 case MRT_ADD_VIF:
345 case MRT_DEL_VIF:
346 case MRT_ADD_MFC:
347 case MRT_DEL_MFC:
348 case MRT_ASSERT:
349 error = ip_mrouter_set(so, optname, m);
350 break;
351 #endif
352
353 default:
354 error = ip_ctloutput(op, so, level, optname, m);
355 break;
356 }
357 break;
358
359 case PRCO_GETOPT:
360 switch (optname) {
361 case IP_HDRINCL:
362 *m = m_get(M_WAIT, M_SOOPTS);
363 (*m)->m_len = sizeof (int);
364 *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL ? 1 : 0;
365 break;
366
367 #ifdef MROUTING
368 case MRT_VERSION:
369 case MRT_ASSERT:
370 error = ip_mrouter_get(so, optname, m);
371 break;
372 #endif
373
374 default:
375 error = ip_ctloutput(op, so, level, optname, m);
376 break;
377 }
378 break;
379 }
380 return (error);
381 }
382
383 int
384 rip_bind(inp, nam)
385 struct inpcb *inp;
386 struct mbuf *nam;
387 {
388 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
389
390 if (nam->m_len != sizeof(*addr))
391 return (EINVAL);
392 if (ifnet.tqh_first == 0)
393 return (EADDRNOTAVAIL);
394 if (addr->sin_family != AF_INET &&
395 addr->sin_family != AF_IMPLINK)
396 return (EAFNOSUPPORT);
397 if (!in_nullhost(addr->sin_addr) &&
398 ifa_ifwithaddr(sintosa(addr)) == 0)
399 return (EADDRNOTAVAIL);
400 inp->inp_laddr = addr->sin_addr;
401 return (0);
402 }
403
404 int
405 rip_connect(inp, nam)
406 struct inpcb *inp;
407 struct mbuf *nam;
408 {
409 struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
410
411 if (nam->m_len != sizeof(*addr))
412 return (EINVAL);
413 if (ifnet.tqh_first == 0)
414 return (EADDRNOTAVAIL);
415 if (addr->sin_family != AF_INET &&
416 addr->sin_family != AF_IMPLINK)
417 return (EAFNOSUPPORT);
418 inp->inp_faddr = addr->sin_addr;
419 return (0);
420 }
421
422 void
423 rip_disconnect(inp)
424 struct inpcb *inp;
425 {
426
427 inp->inp_faddr = zeroin_addr;
428 }
429
430 u_long rip_sendspace = RIPSNDQ;
431 u_long rip_recvspace = RIPRCVQ;
432
433 /*ARGSUSED*/
434 int
435 rip_usrreq(so, req, m, nam, control, p)
436 struct socket *so;
437 int req;
438 struct mbuf *m, *nam, *control;
439 struct proc *p;
440 {
441 struct inpcb *inp;
442 int s;
443 int error = 0;
444 #ifdef MROUTING
445 extern struct socket *ip_mrouter;
446 #endif
447
448 if (req == PRU_CONTROL)
449 return (in_control(so, (long)m, (caddr_t)nam,
450 (struct ifnet *)control, p));
451
452 if (req == PRU_PURGEIF) {
453 in_purgeif((struct ifnet *)control);
454 in_pcbpurgeif(&rawcbtable, (struct ifnet *)control);
455 return (0);
456 }
457
458 s = splsoftnet();
459 inp = sotoinpcb(so);
460 #ifdef DIAGNOSTIC
461 if (req != PRU_SEND && req != PRU_SENDOOB && control)
462 panic("rip_usrreq: unexpected control mbuf");
463 #endif
464 if (inp == 0 && req != PRU_ATTACH) {
465 error = EINVAL;
466 goto release;
467 }
468
469 switch (req) {
470
471 case PRU_ATTACH:
472 if (inp != 0) {
473 error = EISCONN;
474 break;
475 }
476 if (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))) {
477 error = EACCES;
478 break;
479 }
480 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
481 error = soreserve(so, rip_sendspace, rip_recvspace);
482 if (error)
483 break;
484 }
485 error = in_pcballoc(so, &rawcbtable);
486 if (error)
487 break;
488 inp = sotoinpcb(so);
489 inp->inp_ip.ip_p = (long)nam;
490 #ifdef IPSEC
491 error = ipsec_init_policy(so, &inp->inp_sp);
492 if (error != 0) {
493 in_pcbdetach(inp);
494 break;
495 }
496 #endif /*IPSEC*/
497 break;
498
499 case PRU_DETACH:
500 #ifdef MROUTING
501 if (so == ip_mrouter)
502 ip_mrouter_done();
503 #endif
504 in_pcbdetach(inp);
505 break;
506
507 case PRU_BIND:
508 error = rip_bind(inp, nam);
509 break;
510
511 case PRU_LISTEN:
512 error = EOPNOTSUPP;
513 break;
514
515 case PRU_CONNECT:
516 error = rip_connect(inp, nam);
517 if (error)
518 break;
519 soisconnected(so);
520 break;
521
522 case PRU_CONNECT2:
523 error = EOPNOTSUPP;
524 break;
525
526 case PRU_DISCONNECT:
527 soisdisconnected(so);
528 rip_disconnect(inp);
529 break;
530
531 /*
532 * Mark the connection as being incapable of further input.
533 */
534 case PRU_SHUTDOWN:
535 socantsendmore(so);
536 break;
537
538 case PRU_RCVD:
539 error = EOPNOTSUPP;
540 break;
541
542 /*
543 * Ship a packet out. The appropriate raw output
544 * routine handles any massaging necessary.
545 */
546 case PRU_SEND:
547 if (control && control->m_len) {
548 m_freem(control);
549 m_freem(m);
550 error = EINVAL;
551 break;
552 }
553 {
554 if (nam) {
555 if ((so->so_state & SS_ISCONNECTED) != 0) {
556 error = EISCONN;
557 goto die;
558 }
559 error = rip_connect(inp, nam);
560 if (error) {
561 die:
562 m_freem(m);
563 break;
564 }
565 } else {
566 if ((so->so_state & SS_ISCONNECTED) == 0) {
567 error = ENOTCONN;
568 goto die;
569 }
570 }
571 error = rip_output(m, inp);
572 if (nam)
573 rip_disconnect(inp);
574 }
575 break;
576
577 case PRU_SENSE:
578 /*
579 * stat: don't bother with a blocksize.
580 */
581 splx(s);
582 return (0);
583
584 case PRU_RCVOOB:
585 error = EOPNOTSUPP;
586 break;
587
588 case PRU_SENDOOB:
589 m_freem(control);
590 m_freem(m);
591 error = EOPNOTSUPP;
592 break;
593
594 case PRU_SOCKADDR:
595 in_setsockaddr(inp, nam);
596 break;
597
598 case PRU_PEERADDR:
599 in_setpeeraddr(inp, nam);
600 break;
601
602 default:
603 panic("rip_usrreq");
604 }
605
606 release:
607 splx(s);
608 return (error);
609 }
610