Home | History | Annotate | Line # | Download | only in netinet
raw_ip.c revision 1.116.2.1
      1  1.116.2.1     rmind /*	$NetBSD: raw_ip.c,v 1.116.2.1 2013/07/17 03:16:31 rmind Exp $	*/
      2       1.43    itojun 
      3       1.43    itojun /*
      4       1.43    itojun  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
      5       1.43    itojun  * All rights reserved.
      6       1.61    itojun  *
      7       1.43    itojun  * Redistribution and use in source and binary forms, with or without
      8       1.43    itojun  * modification, are permitted provided that the following conditions
      9       1.43    itojun  * are met:
     10       1.43    itojun  * 1. Redistributions of source code must retain the above copyright
     11       1.43    itojun  *    notice, this list of conditions and the following disclaimer.
     12       1.43    itojun  * 2. Redistributions in binary form must reproduce the above copyright
     13       1.43    itojun  *    notice, this list of conditions and the following disclaimer in the
     14       1.43    itojun  *    documentation and/or other materials provided with the distribution.
     15       1.43    itojun  * 3. Neither the name of the project nor the names of its contributors
     16       1.43    itojun  *    may be used to endorse or promote products derived from this software
     17       1.43    itojun  *    without specific prior written permission.
     18       1.61    itojun  *
     19       1.43    itojun  * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
     20       1.43    itojun  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21       1.43    itojun  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22       1.43    itojun  * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
     23       1.43    itojun  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24       1.43    itojun  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25       1.43    itojun  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26       1.43    itojun  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27       1.43    itojun  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28       1.43    itojun  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29       1.43    itojun  * SUCH DAMAGE.
     30       1.43    itojun  */
     31       1.14       cgd 
     32        1.1       cgd /*
     33       1.13   mycroft  * Copyright (c) 1982, 1986, 1988, 1993
     34       1.13   mycroft  *	The Regents of the University of California.  All rights reserved.
     35        1.1       cgd  *
     36        1.1       cgd  * Redistribution and use in source and binary forms, with or without
     37        1.1       cgd  * modification, are permitted provided that the following conditions
     38        1.1       cgd  * are met:
     39        1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     40        1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     41        1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     42        1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     43        1.1       cgd  *    documentation and/or other materials provided with the distribution.
     44       1.71       agc  * 3. Neither the name of the University nor the names of its contributors
     45        1.1       cgd  *    may be used to endorse or promote products derived from this software
     46        1.1       cgd  *    without specific prior written permission.
     47        1.1       cgd  *
     48        1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     49        1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     50        1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     51        1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     52        1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     53        1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     54        1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     55        1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     56        1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     57        1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     58        1.1       cgd  * SUCH DAMAGE.
     59        1.1       cgd  *
     60       1.39   thorpej  *	@(#)raw_ip.c	8.7 (Berkeley) 5/15/95
     61        1.1       cgd  */
     62       1.59     lukem 
     63  1.116.2.1     rmind /*
     64  1.116.2.1     rmind  * Raw interface to IP protocol.
     65  1.116.2.1     rmind  */
     66  1.116.2.1     rmind 
     67       1.59     lukem #include <sys/cdefs.h>
     68  1.116.2.1     rmind __KERNEL_RCSID(0, "$NetBSD: raw_ip.c,v 1.116.2.1 2013/07/17 03:16:31 rmind Exp $");
     69       1.40    scottr 
     70       1.78  jonathan #include "opt_inet.h"
     71      1.109  christos #include "opt_compat_netbsd.h"
     72       1.45   thorpej #include "opt_ipsec.h"
     73       1.40    scottr #include "opt_mrouting.h"
     74        1.1       cgd 
     75        1.7   mycroft #include <sys/param.h>
     76       1.84    atatat #include <sys/sysctl.h>
     77        1.7   mycroft #include <sys/mbuf.h>
     78        1.7   mycroft #include <sys/socket.h>
     79        1.7   mycroft #include <sys/protosw.h>
     80        1.7   mycroft #include <sys/socketvar.h>
     81       1.13   mycroft #include <sys/systm.h>
     82       1.89      elad #include <sys/kauth.h>
     83        1.1       cgd 
     84        1.7   mycroft #include <net/if.h>
     85        1.7   mycroft #include <net/route.h>
     86        1.1       cgd 
     87        1.7   mycroft #include <netinet/in.h>
     88        1.7   mycroft #include <netinet/in_systm.h>
     89        1.7   mycroft #include <netinet/ip.h>
     90        1.7   mycroft #include <netinet/ip_var.h>
     91      1.105   thorpej #include <netinet/ip_private.h>
     92       1.13   mycroft #include <netinet/ip_mroute.h>
     93       1.44   darrenr #include <netinet/ip_icmp.h>
     94        1.7   mycroft #include <netinet/in_pcb.h>
     95       1.87      yamt #include <netinet/in_proto.h>
     96       1.24  christos #include <netinet/in_var.h>
     97       1.24  christos 
     98      1.116  christos #ifdef IPSEC
     99       1.72  jonathan #include <netipsec/ipsec.h>
    100      1.106   thorpej #include <netipsec/ipsec_var.h>
    101      1.106   thorpej #include <netipsec/ipsec_private.h>
    102  1.116.2.1     rmind #endif
    103       1.72  jonathan 
    104      1.109  christos #ifdef COMPAT_50
    105      1.109  christos #include <compat/sys/socket.h>
    106      1.109  christos #endif
    107      1.109  christos 
    108  1.116.2.1     rmind static inpcbtable_t *	rawcbtable __read_mostly;
    109       1.13   mycroft 
    110  1.116.2.1     rmind static void		sysctl_net_inet_raw_setup(struct sysctllog **);
    111      1.110     pooka 
    112       1.13   mycroft /*
    113       1.13   mycroft  * Nominal space allocated to a raw ip socket.
    114       1.13   mycroft  */
    115       1.13   mycroft #define	RIPSNDQ		8192
    116       1.13   mycroft #define	RIPRCVQ		8192
    117        1.1       cgd 
    118  1.116.2.1     rmind static u_long		rip_sendspace = RIPSNDQ;
    119  1.116.2.1     rmind static u_long		rip_recvspace = RIPRCVQ;
    120  1.116.2.1     rmind 
    121  1.116.2.1     rmind struct rip_input_ctx {
    122  1.116.2.1     rmind 	struct mbuf *		mbuf;
    123  1.116.2.1     rmind 	struct ip *		ip;
    124  1.116.2.1     rmind 	struct sockaddr_in	src;
    125  1.116.2.1     rmind 	unsigned		hlen;
    126  1.116.2.1     rmind 	unsigned		nfound;
    127  1.116.2.1     rmind };
    128  1.116.2.1     rmind 
    129  1.116.2.1     rmind struct rip_ctlinput_ctx {
    130  1.116.2.1     rmind 	struct ip *		ip;
    131  1.116.2.1     rmind 	struct in_addr		addr;
    132  1.116.2.1     rmind 	int			errno;
    133  1.116.2.1     rmind };
    134       1.13   mycroft 
    135       1.13   mycroft void
    136       1.83     perry rip_init(void)
    137       1.13   mycroft {
    138  1.116.2.1     rmind 	rawcbtable = inpcb_init(1, 1, 0);
    139      1.110     pooka 	sysctl_net_inet_raw_setup(NULL);
    140       1.13   mycroft }
    141       1.13   mycroft 
    142  1.116.2.1     rmind /*
    143  1.116.2.1     rmind  * rip_append: pass the received datagram to the process.
    144  1.116.2.1     rmind  */
    145      1.100    dyoung static void
    146  1.116.2.1     rmind rip_append(inpcb_t *inp, struct rip_input_ctx *rctx)
    147      1.100    dyoung {
    148  1.116.2.1     rmind 	struct socket *so = inpcb_get_socket(inp);
    149  1.116.2.1     rmind 	int inpflags = inpcb_get_flags(inp);
    150  1.116.2.1     rmind 	struct mbuf *n, *opts = NULL;
    151  1.116.2.1     rmind 
    152  1.116.2.1     rmind 	/* XXX: Might optimise this, but not with a silly loop! */
    153  1.116.2.1     rmind 	if ((n = m_copypacket(rctx->mbuf, M_DONTWAIT)) == NULL) {
    154  1.116.2.1     rmind 		return;
    155  1.116.2.1     rmind 	}
    156  1.116.2.1     rmind 
    157  1.116.2.1     rmind 	if (inpflags & INP_NOHEADER) {
    158  1.116.2.1     rmind 		m_adj(n, rctx->hlen);
    159  1.116.2.1     rmind 	}
    160  1.116.2.1     rmind 
    161  1.116.2.1     rmind 	if ((inpflags & INP_CONTROLOPTS) != 0
    162      1.109  christos #ifdef SO_OTIMESTAMP
    163  1.116.2.1     rmind 	    || (so->so_options & SO_OTIMESTAMP) != 0
    164      1.109  christos #endif
    165  1.116.2.1     rmind 	    || (so->so_options & SO_TIMESTAMP) != 0) {
    166  1.116.2.1     rmind 		struct ip *ip = rctx->ip;
    167  1.116.2.1     rmind 		ip_savecontrol(inp, &opts, ip, n);
    168  1.116.2.1     rmind 	}
    169  1.116.2.1     rmind 
    170  1.116.2.1     rmind 	if (sbappendaddr(&so->so_rcv, sintosa(&rctx->src), n, opts) == 0) {
    171  1.116.2.1     rmind 		/* Should notify about lost packet. */
    172  1.116.2.1     rmind 		if (opts) {
    173      1.100    dyoung 			m_freem(opts);
    174  1.116.2.1     rmind 		}
    175  1.116.2.1     rmind 		m_freem(n);
    176  1.116.2.1     rmind 	} else {
    177  1.116.2.1     rmind 		sorwakeup(so);
    178  1.116.2.1     rmind 	}
    179  1.116.2.1     rmind }
    180  1.116.2.1     rmind 
    181  1.116.2.1     rmind static int
    182  1.116.2.1     rmind rip_pcb_process(inpcb_t *inp, void *arg)
    183  1.116.2.1     rmind {
    184  1.116.2.1     rmind 	struct rip_input_ctx *rctx = arg;
    185  1.116.2.1     rmind 	const struct ip *ip = rctx->ip;
    186  1.116.2.1     rmind 	struct ip *inp_ip = in_getiphdr(inp);
    187  1.116.2.1     rmind 	struct in_addr laddr, faddr;
    188  1.116.2.1     rmind 
    189  1.116.2.1     rmind 	if (inp_ip->ip_p && inp_ip->ip_p != ip->ip_p) {
    190  1.116.2.1     rmind 		return 0;
    191  1.116.2.1     rmind 	}
    192  1.116.2.1     rmind 	inpcb_get_addrs(inp, &laddr, &faddr);
    193  1.116.2.1     rmind 
    194  1.116.2.1     rmind 	if (!in_nullhost(laddr) && !in_hosteq(laddr, ip->ip_dst)) {
    195  1.116.2.1     rmind 		return 0;
    196  1.116.2.1     rmind 	}
    197  1.116.2.1     rmind 	if (!in_nullhost(faddr) && !in_hosteq(faddr, ip->ip_src)) {
    198  1.116.2.1     rmind 		return 0;
    199  1.116.2.1     rmind 	}
    200  1.116.2.1     rmind 
    201  1.116.2.1     rmind #if defined(IPSEC)
    202  1.116.2.1     rmind 	/* Check AH/ESP integrity. */
    203  1.116.2.1     rmind 	if (ipsec4_in_reject_so(rctx->mbuf, inpcb_get_socket(inp))) {
    204  1.116.2.1     rmind 		/* Do not inject data into PCB. */
    205  1.116.2.1     rmind 		IPSEC_STATINC(IPSEC_STAT_IN_POLVIO);
    206  1.116.2.1     rmind 		return 0;
    207  1.116.2.1     rmind 	}
    208  1.116.2.1     rmind #endif
    209  1.116.2.1     rmind 	rip_append(inp, rctx);
    210  1.116.2.1     rmind 	rctx->nfound++;
    211  1.116.2.1     rmind 	return 0;
    212      1.100    dyoung }
    213      1.100    dyoung 
    214        1.9   mycroft void
    215       1.24  christos rip_input(struct mbuf *m, ...)
    216        1.1       cgd {
    217       1.53  augustss 	struct ip *ip = mtod(m, struct ip *);
    218  1.116.2.1     rmind 	int error, hlen, proto;
    219       1.43    itojun 	va_list ap;
    220       1.43    itojun 
    221       1.43    itojun 	va_start(ap, m);
    222       1.64    simonb 	(void)va_arg(ap, int);		/* ignore value, advance ap */
    223       1.43    itojun 	proto = va_arg(ap, int);
    224       1.43    itojun 	va_end(ap);
    225        1.1       cgd 
    226  1.116.2.1     rmind 	KASSERTMSG((proto == ip->ip_p), "%s: protocol mismatch", __func__);
    227       1.42   thorpej 
    228       1.42   thorpej 	/*
    229  1.116.2.1     rmind 	 * Compatibility: programs using raw IP expect ip_len field to have
    230  1.116.2.1     rmind 	 * the header length subtracted.  Also, ip_len and ip_off fields are
    231  1.116.2.1     rmind 	 * expected to be in host order.
    232       1.42   thorpej 	 */
    233      1.100    dyoung 	hlen = ip->ip_hl << 2;
    234      1.100    dyoung 	ip->ip_len = ntohs(ip->ip_len) - hlen;
    235       1.62    itojun 	NTOHS(ip->ip_off);
    236       1.32   mycroft 
    237  1.116.2.1     rmind 	/* Save some context for the iterator. */
    238  1.116.2.1     rmind 	struct rip_input_ctx rctx = {
    239  1.116.2.1     rmind 		.mbuf = m, .ip = ip, .hlen = hlen, .nfound = 0
    240  1.116.2.1     rmind 	};
    241  1.116.2.1     rmind 	sockaddr_in_init(&rctx.src, &ip->ip_src, 0);
    242  1.116.2.1     rmind 
    243  1.116.2.1     rmind 	/* Scan all raw IP PCBs for matching entries. */
    244  1.116.2.1     rmind 	error = inpcb_foreach(rawcbtable, AF_INET, rip_pcb_process, &rctx);
    245  1.116.2.1     rmind 	KASSERT(error == 0);
    246  1.116.2.1     rmind 
    247  1.116.2.1     rmind 	/* Done, if found any. */
    248  1.116.2.1     rmind 	if (rctx.nfound) {
    249  1.116.2.1     rmind 		return;
    250       1.13   mycroft 	}
    251  1.116.2.1     rmind 
    252  1.116.2.1     rmind 	if (inetsw[ip_protox[ip->ip_p]].pr_input == rip_input) {
    253      1.105   thorpej 		uint64_t *ips;
    254      1.105   thorpej 
    255  1.116.2.1     rmind 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL, 0, 0);
    256      1.105   thorpej 		ips = IP_STAT_GETREF();
    257      1.105   thorpej 		ips[IP_STAT_NOPROTO]++;
    258      1.105   thorpej 		ips[IP_STAT_DELIVERED]--;
    259      1.105   thorpej 		IP_STAT_PUTREF();
    260  1.116.2.1     rmind 	} else {
    261       1.97    dyoung 		m_freem(m);
    262  1.116.2.1     rmind 	}
    263       1.60    itojun }
    264       1.60    itojun 
    265  1.116.2.1     rmind static int
    266  1.116.2.1     rmind rip_pcbnotify(inpcb_t *inp, void *arg)
    267  1.116.2.1     rmind {
    268  1.116.2.1     rmind 	struct rip_ctlinput_ctx *rctx = arg;
    269  1.116.2.1     rmind 	const struct ip *ip = rctx->ip;
    270  1.116.2.1     rmind 	struct ip *inp_ip = in_getiphdr(inp);
    271  1.116.2.1     rmind 	struct in_addr laddr, faddr;
    272  1.116.2.1     rmind 
    273  1.116.2.1     rmind 	if (inp_ip->ip_p && inp_ip->ip_p != ip->ip_p) {
    274  1.116.2.1     rmind 		return 0;
    275       1.60    itojun 	}
    276  1.116.2.1     rmind 	inpcb_get_addrs(inp, &laddr, &faddr);
    277       1.60    itojun 
    278  1.116.2.1     rmind 	if (in_hosteq(faddr, rctx->addr) && in_hosteq(laddr, ip->ip_src)) {
    279  1.116.2.1     rmind 		inpcb_rtchange(inp, rctx->errno);
    280  1.116.2.1     rmind 	}
    281  1.116.2.1     rmind 	return 0;
    282       1.60    itojun }
    283       1.60    itojun 
    284       1.60    itojun void *
    285       1.95    dyoung rip_ctlinput(int cmd, const struct sockaddr *sa, void *v)
    286       1.60    itojun {
    287       1.60    itojun 	struct ip *ip = v;
    288       1.60    itojun 	int errno;
    289       1.60    itojun 
    290       1.60    itojun 	if (sa->sa_family != AF_INET ||
    291       1.60    itojun 	    sa->sa_len != sizeof(struct sockaddr_in))
    292       1.60    itojun 		return NULL;
    293       1.60    itojun 	if ((unsigned)cmd >= PRC_NCMDS)
    294       1.60    itojun 		return NULL;
    295       1.60    itojun 	errno = inetctlerrmap[cmd];
    296  1.116.2.1     rmind 
    297  1.116.2.1     rmind 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD || ip == NULL) {
    298  1.116.2.1     rmind 		inpcb_notifyall(rawcbtable, satocsin(sa)->sin_addr,
    299  1.116.2.1     rmind 		    errno, inpcb_rtchange);
    300       1.60    itojun 		return NULL;
    301  1.116.2.1     rmind 	} else if (errno == 0) {
    302  1.116.2.1     rmind 		return NULL;
    303  1.116.2.1     rmind 	}
    304  1.116.2.1     rmind 
    305  1.116.2.1     rmind 	/* Note: mapped address case. */
    306  1.116.2.1     rmind 	struct rip_ctlinput_ctx rctx = {
    307  1.116.2.1     rmind 		.ip = ip, .addr = satocsin(sa)->sin_addr, .errno = errno
    308  1.116.2.1     rmind 	};
    309  1.116.2.1     rmind 	(void)inpcb_foreach(rawcbtable, AF_INET, rip_pcbnotify, &rctx);
    310  1.116.2.1     rmind 
    311       1.60    itojun 	return NULL;
    312        1.1       cgd }
    313        1.1       cgd 
    314        1.1       cgd /*
    315  1.116.2.1     rmind  * Generate IP header and pass packet to the IP output routine.
    316        1.1       cgd  * Tack on options user may have setup with control call.
    317        1.1       cgd  */
    318        1.9   mycroft int
    319       1.24  christos rip_output(struct mbuf *m, ...)
    320       1.24  christos {
    321  1.116.2.1     rmind 	inpcb_t *inp;
    322  1.116.2.1     rmind 	struct socket *so;
    323       1.53  augustss 	struct ip *ip;
    324       1.10   mycroft 	struct mbuf *opts;
    325  1.116.2.1     rmind 	int flags, inpflags;
    326       1.24  christos 	va_list ap;
    327       1.24  christos 
    328       1.24  christos 	va_start(ap, m);
    329  1.116.2.1     rmind 	inp = va_arg(ap, inpcb_t *);
    330       1.24  christos 	va_end(ap);
    331       1.24  christos 
    332  1.116.2.1     rmind 	so = inpcb_get_socket(inp);
    333  1.116.2.1     rmind 	KASSERT(solocked(so));
    334  1.116.2.1     rmind 
    335  1.116.2.1     rmind 	flags = (so->so_options & SO_DONTROUTE) |
    336  1.116.2.1     rmind 	    IP_ALLOWBROADCAST | IP_RETURNMTU;
    337  1.116.2.1     rmind 	inpflags = inpcb_get_flags(inp);
    338        1.1       cgd 
    339        1.1       cgd 	/*
    340        1.1       cgd 	 * If the user handed us a complete IP packet, use it.
    341        1.1       cgd 	 * Otherwise, allocate an mbuf for a header and fill it in.
    342        1.1       cgd 	 */
    343  1.116.2.1     rmind 	if ((inpflags & INP_HDRINCL) == 0) {
    344  1.116.2.1     rmind 		struct ip *inp_ip = in_getiphdr(inp);
    345  1.116.2.1     rmind 
    346       1.35   thorpej 		if ((m->m_pkthdr.len + sizeof(struct ip)) > IP_MAXPACKET) {
    347       1.35   thorpej 			m_freem(m);
    348  1.116.2.1     rmind 			return EMSGSIZE;
    349       1.35   thorpej 		}
    350       1.68    itojun 		M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
    351  1.116.2.1     rmind 		if (m == NULL) {
    352  1.116.2.1     rmind 			return ENOBUFS;
    353  1.116.2.1     rmind 		}
    354        1.1       cgd 		ip = mtod(m, struct ip *);
    355        1.1       cgd 		ip->ip_tos = 0;
    356       1.62    itojun 		ip->ip_off = htons(0);
    357  1.116.2.1     rmind 		ip->ip_p = inp_ip->ip_p;
    358       1.62    itojun 		ip->ip_len = htons(m->m_pkthdr.len);
    359  1.116.2.1     rmind 		inpcb_get_addrs(inp, &ip->ip_src, &ip->ip_dst);
    360  1.116.2.1     rmind 
    361        1.1       cgd 		ip->ip_ttl = MAXTTL;
    362  1.116.2.1     rmind 		opts = inpcb_get_options(inp);
    363       1.13   mycroft 	} else {
    364       1.35   thorpej 		if (m->m_pkthdr.len > IP_MAXPACKET) {
    365       1.35   thorpej 			m_freem(m);
    366  1.116.2.1     rmind 			return EMSGSIZE;
    367       1.35   thorpej 		}
    368       1.13   mycroft 		ip = mtod(m, struct ip *);
    369       1.65   thorpej 
    370       1.65   thorpej 		/*
    371  1.116.2.1     rmind 		 * If the mbuf is read-only, we need to allocate a new mbuf
    372  1.116.2.1     rmind 		 * for the header, since we need to modify the header.
    373       1.65   thorpej 		 */
    374       1.65   thorpej 		if (M_READONLY(m)) {
    375  1.116.2.1     rmind 			const int hlen = ip->ip_hl << 2;
    376       1.65   thorpej 
    377       1.65   thorpej 			m = m_copyup(m, hlen, (max_linkhdr + 3) & ~3);
    378  1.116.2.1     rmind 			if (m == NULL) {
    379  1.116.2.1     rmind 				return ENOMEM;	/* XXX */
    380  1.116.2.1     rmind 			}
    381       1.65   thorpej 			ip = mtod(m, struct ip *);
    382       1.65   thorpej 		}
    383       1.65   thorpej 
    384  1.116.2.1     rmind 		/*
    385  1.116.2.1     rmind 		 * Applications on raw sockets pass us packets
    386  1.116.2.1     rmind 		 * in host byte order.
    387  1.116.2.1     rmind 		 */
    388       1.38   mycroft 		if (m->m_pkthdr.len != ip->ip_len) {
    389       1.38   mycroft 			m_freem(m);
    390       1.38   mycroft 			return (EINVAL);
    391       1.38   mycroft 		}
    392       1.62    itojun 		HTONS(ip->ip_len);
    393       1.62    itojun 		HTONS(ip->ip_off);
    394  1.116.2.1     rmind 		if (ip->ip_id || m->m_pkthdr.len < IP_MINFRAGSIZE) {
    395      1.103      matt 			flags |= IP_NOIPNEWID;
    396  1.116.2.1     rmind 		}
    397       1.13   mycroft 		opts = NULL;
    398  1.116.2.1     rmind 
    399  1.116.2.1     rmind 		/*
    400  1.116.2.1     rmind 		 * Note: prevent IP output from overwriting header fields.
    401  1.116.2.1     rmind 		 */
    402       1.13   mycroft 		flags |= IP_RAWOUTPUT;
    403      1.105   thorpej 		IP_STATINC(IP_STAT_RAWOUT);
    404        1.1       cgd 	}
    405  1.116.2.1     rmind 
    406  1.116.2.1     rmind 	return ip_output(m, opts, inpcb_get_route(inp), flags,
    407  1.116.2.1     rmind 	    inpcb_get_moptions(inp), so);
    408        1.1       cgd }
    409        1.1       cgd 
    410        1.1       cgd /*
    411        1.1       cgd  * Raw IP socket option processing.
    412        1.1       cgd  */
    413        1.9   mycroft int
    414      1.108    plunky rip_ctloutput(int op, struct socket *so, struct sockopt *sopt)
    415        1.1       cgd {
    416  1.116.2.1     rmind 	inpcb_t *inp = sotoinpcb(so);
    417  1.116.2.1     rmind 	int inpflags = inpcb_get_flags(inp);
    418  1.116.2.1     rmind 	int error = 0, optval;
    419  1.116.2.1     rmind 
    420  1.116.2.1     rmind 	KASSERT(solocked(so));
    421        1.1       cgd 
    422      1.108    plunky 	if (sopt->sopt_level == SOL_SOCKET && sopt->sopt_name == SO_NOHEADER) {
    423      1.100    dyoung 		if (op == PRCO_GETOPT) {
    424  1.116.2.1     rmind 			optval = (inpflags & INP_NOHEADER) ? 1 : 0;
    425      1.108    plunky 			error = sockopt_set(sopt, &optval, sizeof(optval));
    426      1.108    plunky 		} else if (op == PRCO_SETOPT) {
    427      1.108    plunky 			error = sockopt_getint(sopt, &optval);
    428      1.108    plunky 			if (error)
    429      1.108    plunky 				goto out;
    430      1.108    plunky 			if (optval) {
    431  1.116.2.1     rmind 				inpflags &= ~INP_HDRINCL;
    432  1.116.2.1     rmind 				inpflags |= INP_NOHEADER;
    433      1.108    plunky 			} else
    434  1.116.2.1     rmind 				inpflags &= ~INP_NOHEADER;
    435      1.108    plunky 		}
    436      1.108    plunky 		goto out;
    437  1.116.2.1     rmind 	}
    438  1.116.2.1     rmind 
    439  1.116.2.1     rmind 	if (sopt->sopt_level != IPPROTO_IP) {
    440      1.108    plunky 		return ip_ctloutput(op, so, sopt);
    441  1.116.2.1     rmind 	}
    442      1.100    dyoung 
    443      1.100    dyoung 	switch (op) {
    444       1.31   mycroft 	case PRCO_SETOPT:
    445      1.108    plunky 		switch (sopt->sopt_name) {
    446       1.31   mycroft 		case IP_HDRINCL:
    447      1.108    plunky 			error = sockopt_getint(sopt, &optval);
    448      1.108    plunky 			if (error)
    449      1.108    plunky 				break;
    450      1.108    plunky 			if (optval)
    451  1.116.2.1     rmind 				inpflags |= INP_HDRINCL;
    452      1.100    dyoung 			else
    453  1.116.2.1     rmind 				inpflags &= ~INP_HDRINCL;
    454      1.108    plunky 			break;
    455       1.31   mycroft 
    456       1.31   mycroft #ifdef MROUTING
    457       1.31   mycroft 		case MRT_INIT:
    458       1.31   mycroft 		case MRT_DONE:
    459       1.31   mycroft 		case MRT_ADD_VIF:
    460       1.31   mycroft 		case MRT_DEL_VIF:
    461       1.31   mycroft 		case MRT_ADD_MFC:
    462       1.31   mycroft 		case MRT_DEL_MFC:
    463       1.31   mycroft 		case MRT_ASSERT:
    464       1.81      manu 		case MRT_API_CONFIG:
    465       1.81      manu 		case MRT_ADD_BW_UPCALL:
    466       1.81      manu 		case MRT_DEL_BW_UPCALL:
    467      1.108    plunky 			error = ip_mrouter_set(so, sopt);
    468       1.31   mycroft 			break;
    469       1.31   mycroft #endif
    470       1.31   mycroft 
    471       1.31   mycroft 		default:
    472      1.108    plunky 			error = ip_ctloutput(op, so, sopt);
    473       1.31   mycroft 			break;
    474       1.13   mycroft 		}
    475       1.13   mycroft 		break;
    476        1.1       cgd 
    477       1.31   mycroft 	case PRCO_GETOPT:
    478      1.108    plunky 		switch (sopt->sopt_name) {
    479       1.31   mycroft 		case IP_HDRINCL:
    480  1.116.2.1     rmind 			optval = inpflags & INP_HDRINCL;
    481      1.108    plunky 			error = sockopt_set(sopt, &optval, sizeof(optval));
    482       1.31   mycroft 			break;
    483       1.31   mycroft 
    484        1.6   hpeyerl #ifdef MROUTING
    485       1.31   mycroft 		case MRT_VERSION:
    486       1.31   mycroft 		case MRT_ASSERT:
    487       1.81      manu 		case MRT_API_SUPPORT:
    488       1.81      manu 		case MRT_API_CONFIG:
    489      1.108    plunky 			error = ip_mrouter_get(so, sopt);
    490       1.18   mycroft 			break;
    491       1.31   mycroft #endif
    492       1.31   mycroft 
    493       1.18   mycroft 		default:
    494      1.108    plunky 			error = ip_ctloutput(op, so, sopt);
    495       1.18   mycroft 			break;
    496       1.18   mycroft 		}
    497       1.31   mycroft 		break;
    498        1.1       cgd 	}
    499      1.108    plunky  out:
    500  1.116.2.1     rmind 	if (!error) {
    501  1.116.2.1     rmind 		inpcb_set_flags(inp, inpflags);
    502  1.116.2.1     rmind 	}
    503      1.100    dyoung 	return error;
    504        1.1       cgd }
    505        1.1       cgd 
    506  1.116.2.1     rmind static int
    507  1.116.2.1     rmind rip_bind(inpcb_t *inp, struct mbuf *nam)
    508       1.29   mycroft {
    509       1.29   mycroft 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
    510       1.29   mycroft 
    511       1.29   mycroft 	if (nam->m_len != sizeof(*addr))
    512  1.116.2.1     rmind 		return EINVAL;
    513  1.116.2.1     rmind 	if (!IFNET_FIRST())
    514  1.116.2.1     rmind 		return EADDRNOTAVAIL;
    515      1.115     joerg 	if (addr->sin_family != AF_INET)
    516  1.116.2.1     rmind 		return EAFNOSUPPORT;
    517  1.116.2.1     rmind 	if (!in_nullhost(addr->sin_addr) && !ifa_ifwithaddr(sintosa(addr)))
    518  1.116.2.1     rmind 		return EADDRNOTAVAIL;
    519  1.116.2.1     rmind 
    520  1.116.2.1     rmind 	inpcb_set_addrs(inp, &addr->sin_addr, NULL);
    521  1.116.2.1     rmind 	return 0;
    522       1.29   mycroft }
    523       1.29   mycroft 
    524  1.116.2.1     rmind static int
    525  1.116.2.1     rmind rip_connect(inpcb_t *inp, struct mbuf *nam)
    526       1.27   mycroft {
    527       1.27   mycroft 	struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *);
    528       1.27   mycroft 
    529       1.27   mycroft 	if (nam->m_len != sizeof(*addr))
    530  1.116.2.1     rmind 		return EINVAL;
    531  1.116.2.1     rmind 	if (!IFNET_FIRST())
    532  1.116.2.1     rmind 		return EADDRNOTAVAIL;
    533      1.115     joerg 	if (addr->sin_family != AF_INET)
    534  1.116.2.1     rmind 		return EAFNOSUPPORT;
    535  1.116.2.1     rmind 
    536  1.116.2.1     rmind 	inpcb_set_addrs(inp, NULL, &addr->sin_addr);
    537  1.116.2.1     rmind 	return 0;
    538       1.27   mycroft }
    539       1.27   mycroft 
    540  1.116.2.1     rmind static void
    541  1.116.2.1     rmind rip_disconnect(inpcb_t *inp)
    542       1.27   mycroft {
    543  1.116.2.1     rmind 	inpcb_set_addrs(inp, NULL, &zeroin_addr);
    544       1.27   mycroft }
    545       1.27   mycroft 
    546        1.9   mycroft int
    547  1.116.2.1     rmind rip_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
    548  1.116.2.1     rmind     struct mbuf *control, struct lwp *l)
    549        1.1       cgd {
    550  1.116.2.1     rmind 	inpcb_t *inp;
    551  1.116.2.1     rmind 	struct ip *ip;
    552       1.53  augustss 	int error = 0;
    553       1.13   mycroft #ifdef MROUTING
    554        1.6   hpeyerl 	extern struct socket *ip_mrouter;
    555        1.6   hpeyerl #endif
    556       1.27   mycroft 
    557  1.116.2.1     rmind 	if (req == PRU_CONTROL) {
    558  1.116.2.1     rmind 		return in_control(so, (long)m, nam, (ifnet_t *)control, l);
    559  1.116.2.1     rmind 	}
    560       1.50   thorpej 	if (req == PRU_PURGEIF) {
    561  1.116.2.1     rmind 		int s = splsoftnet();
    562      1.107        ad 		mutex_enter(softnet_lock);
    563  1.116.2.1     rmind 		inpcb_purgeif0(rawcbtable, (ifnet_t *)control);
    564  1.116.2.1     rmind 		in_purgeif((ifnet_t *)control);
    565  1.116.2.1     rmind 		inpcb_purgeif(rawcbtable, (ifnet_t *)control);
    566      1.107        ad 		mutex_exit(softnet_lock);
    567       1.93       tls 		splx(s);
    568  1.116.2.1     rmind 		return 0;
    569       1.49   thorpej 	}
    570       1.22        pk 
    571  1.116.2.1     rmind 	KASSERT(req == PRU_ATTACH || solocked(so));
    572       1.27   mycroft 	inp = sotoinpcb(so);
    573  1.116.2.1     rmind 
    574  1.116.2.1     rmind 	KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB));
    575      1.111    dyoung 	if (inp == NULL && req != PRU_ATTACH) {
    576  1.116.2.1     rmind 		return EINVAL;
    577       1.22        pk 	}
    578       1.22        pk 
    579        1.1       cgd 	switch (req) {
    580        1.1       cgd 	case PRU_ATTACH:
    581      1.107        ad 		sosetlock(so);
    582  1.116.2.1     rmind 		if (inp) {
    583       1.27   mycroft 			error = EISCONN;
    584       1.27   mycroft 			break;
    585       1.27   mycroft 		}
    586       1.94      elad 
    587       1.94      elad 		/* XXX: raw socket permissions are checked in socreate() */
    588       1.94      elad 
    589       1.27   mycroft 		if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
    590       1.27   mycroft 			error = soreserve(so, rip_sendspace, rip_recvspace);
    591       1.27   mycroft 			if (error)
    592       1.27   mycroft 				break;
    593       1.27   mycroft 		}
    594  1.116.2.1     rmind 		error = inpcb_create(so, rawcbtable);
    595       1.27   mycroft 		if (error)
    596       1.13   mycroft 			break;
    597       1.27   mycroft 		inp = sotoinpcb(so);
    598  1.116.2.1     rmind 		ip = in_getiphdr(inp);
    599  1.116.2.1     rmind 		ip->ip_p = (long)nam;
    600        1.1       cgd 		break;
    601        1.1       cgd 
    602        1.1       cgd 	case PRU_DETACH:
    603       1.13   mycroft #ifdef MROUTING
    604        1.6   hpeyerl 		if (so == ip_mrouter)
    605        1.6   hpeyerl 			ip_mrouter_done();
    606        1.6   hpeyerl #endif
    607  1.116.2.1     rmind 		inpcb_destroy(inp);
    608        1.1       cgd 		break;
    609        1.1       cgd 
    610        1.1       cgd 	case PRU_BIND:
    611       1.29   mycroft 		error = rip_bind(inp, nam);
    612       1.27   mycroft 		break;
    613       1.27   mycroft 
    614       1.27   mycroft 	case PRU_LISTEN:
    615       1.27   mycroft 		error = EOPNOTSUPP;
    616       1.27   mycroft 		break;
    617       1.27   mycroft 
    618       1.27   mycroft 	case PRU_CONNECT:
    619       1.27   mycroft 		error = rip_connect(inp, nam);
    620       1.27   mycroft 		if (error)
    621       1.13   mycroft 			break;
    622        1.1       cgd 		soisconnected(so);
    623       1.13   mycroft 		break;
    624       1.13   mycroft 
    625       1.13   mycroft 	case PRU_CONNECT2:
    626       1.13   mycroft 		error = EOPNOTSUPP;
    627       1.13   mycroft 		break;
    628       1.13   mycroft 
    629       1.27   mycroft 	case PRU_DISCONNECT:
    630       1.27   mycroft 		soisdisconnected(so);
    631       1.27   mycroft 		rip_disconnect(inp);
    632       1.27   mycroft 		break;
    633       1.27   mycroft 
    634       1.13   mycroft 	/*
    635       1.13   mycroft 	 * Mark the connection as being incapable of further input.
    636       1.13   mycroft 	 */
    637       1.13   mycroft 	case PRU_SHUTDOWN:
    638       1.13   mycroft 		socantsendmore(so);
    639       1.13   mycroft 		break;
    640       1.13   mycroft 
    641       1.27   mycroft 	case PRU_RCVD:
    642       1.27   mycroft 		error = EOPNOTSUPP;
    643       1.27   mycroft 		break;
    644       1.27   mycroft 
    645       1.13   mycroft 	/*
    646       1.13   mycroft 	 * Ship a packet out.  The appropriate raw output
    647       1.13   mycroft 	 * routine handles any massaging necessary.
    648       1.13   mycroft 	 */
    649       1.13   mycroft 	case PRU_SEND:
    650       1.28   mycroft 		if (control && control->m_len) {
    651       1.28   mycroft 			m_freem(control);
    652       1.28   mycroft 			m_freem(m);
    653       1.28   mycroft 			error = EINVAL;
    654       1.28   mycroft 			break;
    655       1.28   mycroft 		}
    656  1.116.2.1     rmind 		if ((so->so_state & SS_ISCONNECTED) != 0) {
    657  1.116.2.1     rmind 			error = nam ? EISCONN : ENOTCONN;
    658  1.116.2.1     rmind 			m_freem(m);
    659  1.116.2.1     rmind 			break;
    660  1.116.2.1     rmind 		}
    661  1.116.2.1     rmind 		if (nam && (error = rip_connect(inp, nam)) != 0) {
    662  1.116.2.1     rmind 			m_freem(m);
    663  1.116.2.1     rmind 			break;
    664       1.13   mycroft 		}
    665       1.27   mycroft 		error = rip_output(m, inp);
    666  1.116.2.1     rmind 		if (nam) {
    667       1.27   mycroft 			rip_disconnect(inp);
    668  1.116.2.1     rmind 		}
    669       1.13   mycroft 		break;
    670       1.13   mycroft 
    671       1.13   mycroft 	case PRU_SENSE:
    672       1.13   mycroft 		/*
    673  1.116.2.1     rmind 		 * Stat: do not bother with a blocksize.
    674       1.13   mycroft 		 */
    675  1.116.2.1     rmind 		return 0;
    676       1.13   mycroft 
    677       1.13   mycroft 	case PRU_RCVOOB:
    678       1.27   mycroft 		error = EOPNOTSUPP;
    679       1.27   mycroft 		break;
    680       1.27   mycroft 
    681       1.13   mycroft 	case PRU_SENDOOB:
    682       1.28   mycroft 		m_freem(control);
    683       1.27   mycroft 		m_freem(m);
    684       1.13   mycroft 		error = EOPNOTSUPP;
    685       1.13   mycroft 		break;
    686       1.13   mycroft 
    687       1.13   mycroft 	case PRU_SOCKADDR:
    688  1.116.2.1     rmind 		inpcb_fetch_sockaddr(inp, nam);
    689       1.13   mycroft 		break;
    690       1.13   mycroft 
    691       1.13   mycroft 	case PRU_PEERADDR:
    692  1.116.2.1     rmind 		inpcb_fetch_peeraddr(inp, nam);
    693       1.13   mycroft 		break;
    694       1.13   mycroft 
    695       1.13   mycroft 	default:
    696  1.116.2.1     rmind 		KASSERT(false);
    697        1.1       cgd 	}
    698       1.27   mycroft 
    699  1.116.2.1     rmind 	return error;
    700        1.1       cgd }
    701       1.84    atatat 
    702      1.110     pooka static void
    703      1.110     pooka sysctl_net_inet_raw_setup(struct sysctllog **clog)
    704       1.84    atatat {
    705       1.84    atatat 	sysctl_createv(clog, 0, NULL, NULL,
    706       1.84    atatat 		       CTLFLAG_PERMANENT,
    707       1.84    atatat 		       CTLTYPE_NODE, "net", NULL,
    708       1.84    atatat 		       NULL, 0, NULL, 0,
    709       1.84    atatat 		       CTL_NET, CTL_EOL);
    710       1.84    atatat 	sysctl_createv(clog, 0, NULL, NULL,
    711       1.84    atatat 		       CTLFLAG_PERMANENT,
    712       1.84    atatat 		       CTLTYPE_NODE, "inet", NULL,
    713       1.84    atatat 		       NULL, 0, NULL, 0,
    714       1.84    atatat 		       CTL_NET, PF_INET, CTL_EOL);
    715       1.84    atatat 	sysctl_createv(clog, 0, NULL, NULL,
    716       1.84    atatat 		       CTLFLAG_PERMANENT,
    717       1.84    atatat 		       CTLTYPE_NODE, "raw",
    718       1.84    atatat 		       SYSCTL_DESCR("Raw IPv4 settings"),
    719       1.84    atatat 		       NULL, 0, NULL, 0,
    720       1.84    atatat 		       CTL_NET, PF_INET, IPPROTO_RAW, CTL_EOL);
    721       1.84    atatat 	sysctl_createv(clog, 0, NULL, NULL,
    722       1.84    atatat 		       CTLFLAG_PERMANENT,
    723       1.86    atatat 		       CTLTYPE_STRUCT, "pcblist",
    724       1.84    atatat 		       SYSCTL_DESCR("Raw IPv4 control block list"),
    725  1.116.2.1     rmind 		       sysctl_inpcblist, 0, rawcbtable, 0,
    726       1.84    atatat 		       CTL_NET, PF_INET, IPPROTO_RAW,
    727       1.84    atatat 		       CTL_CREATE, CTL_EOL);
    728       1.84    atatat }
    729