udp_usrreq.c revision 1.190.2.4 1 /* $NetBSD: udp_usrreq.c,v 1.190.2.4 2013/10/17 23:52:18 rmind Exp $ */
2
3 /*
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*
33 * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
34 * The Regents of the University of California. All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 * 3. Neither the name of the University nor the names of its contributors
45 * may be used to endorse or promote products derived from this software
46 * without specific prior written permission.
47 *
48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
58 * SUCH DAMAGE.
59 *
60 * @(#)udp_usrreq.c 8.6 (Berkeley) 5/23/95
61 */
62
63 /*
64 * UDP protocol implementation.
65 * Per RFC 768, August, 1980.
66 */
67
68 #include <sys/cdefs.h>
69 __KERNEL_RCSID(0, "$NetBSD: udp_usrreq.c,v 1.190.2.4 2013/10/17 23:52:18 rmind Exp $");
70
71 #include "opt_inet.h"
72 #include "opt_compat_netbsd.h"
73 #include "opt_ipsec.h"
74 #include "opt_inet_csum.h"
75 #include "opt_ipkdb.h"
76 #include "opt_mbuftrace.h"
77
78 #include <sys/param.h>
79 #include <sys/mbuf.h>
80 #include <sys/protosw.h>
81 #include <sys/socket.h>
82 #include <sys/socketvar.h>
83 #include <sys/systm.h>
84 #include <sys/kmem.h>
85 #include <sys/domain.h>
86 #include <sys/sysctl.h>
87
88 #include <net/if.h>
89 #include <net/route.h>
90
91 #include <netinet/in.h>
92 #include <netinet/in_systm.h>
93 #include <netinet/in_var.h>
94 #include <netinet/ip.h>
95 #include <netinet/in_pcb.h>
96 #include <netinet/ip_var.h>
97 #include <netinet/ip_icmp.h>
98 #include <netinet/udp.h>
99 #include <netinet/udp_var.h>
100 #include <netinet/udp_private.h>
101
102 #ifdef INET6
103 #include <netinet/ip6.h>
104 #include <netinet/icmp6.h>
105 #include <netinet6/ip6_var.h>
106 #include <netinet6/ip6_private.h>
107 #include <netinet6/in6_pcb.h>
108 #include <netinet6/udp6_var.h>
109 #include <netinet6/udp6_private.h>
110 #endif
111
112 #ifndef INET6
113 /* always need ip6.h for IP6_EXTHDR_GET */
114 #include <netinet/ip6.h>
115 #endif
116
117 #ifdef IPSEC
118 #include <netipsec/ipsec.h>
119 #include <netipsec/ipsec_var.h>
120 #include <netipsec/ipsec_private.h>
121 #include <netipsec/esp.h>
122 #ifdef INET6
123 #include <netipsec/ipsec6.h>
124 #endif
125 #endif /* IPSEC */
126
127 #ifdef COMPAT_50
128 #include <compat/sys/socket.h>
129 #endif
130
131 #ifdef IPKDB
132 #include <ipkdb/ipkdb.h>
133 #endif
134
135 int udpcksum = 1;
136 int udp_do_loopback_cksum = 0;
137
138 inpcbtable_t * udbtable __read_mostly;
139 percpu_t * udpstat_percpu;
140
141 #ifdef INET
142 #ifdef IPSEC
143 static int udp4_espinudp (struct mbuf **, int, struct sockaddr *,
144 struct socket *);
145 #endif
146 static void udp4_sendup (struct mbuf *, int, struct sockaddr *,
147 struct socket *);
148 static int udp4_realinput (struct sockaddr_in *, struct sockaddr_in *,
149 struct mbuf **, int);
150 static int udp4_input_checksum(struct mbuf *, const struct udphdr *, int, int);
151 #endif
152
153 #ifndef UDBHASHSIZE
154 #define UDBHASHSIZE 128
155 #endif
156 int udbhashsize = UDBHASHSIZE;
157
158 static int udp_sendspace = 9216;
159 static int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
160
161 #ifdef MBUFTRACE
162 struct mowner udp_mowner = MOWNER_INIT("udp", "");
163 struct mowner udp_rx_mowner = MOWNER_INIT("udp", "rx");
164 struct mowner udp_tx_mowner = MOWNER_INIT("udp", "tx");
165 #endif
166
167 #ifdef UDP_CSUM_COUNTERS
168 #include <sys/device.h>
169
170 #if defined(INET)
171 struct evcnt udp_hwcsum_bad = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
172 NULL, "udp", "hwcsum bad");
173 struct evcnt udp_hwcsum_ok = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
174 NULL, "udp", "hwcsum ok");
175 struct evcnt udp_hwcsum_data = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
176 NULL, "udp", "hwcsum data");
177 struct evcnt udp_swcsum = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
178 NULL, "udp", "swcsum");
179
180 EVCNT_ATTACH_STATIC(udp_hwcsum_bad);
181 EVCNT_ATTACH_STATIC(udp_hwcsum_ok);
182 EVCNT_ATTACH_STATIC(udp_hwcsum_data);
183 EVCNT_ATTACH_STATIC(udp_swcsum);
184 #endif /* defined(INET) */
185
186 #define UDP_CSUM_COUNTER_INCR(ev) (ev)->ev_count++
187 #else
188 #define UDP_CSUM_COUNTER_INCR(ev) /* nothing */
189 #endif /* UDP_CSUM_COUNTERS */
190
191 static void sysctl_net_inet_udp_setup(struct sysctllog **);
192
193 void
194 udp_init(void)
195 {
196 udbtable = inpcb_init(udbhashsize, udbhashsize, 0);
197 sysctl_net_inet_udp_setup(NULL);
198
199 MOWNER_ATTACH(&udp_tx_mowner);
200 MOWNER_ATTACH(&udp_rx_mowner);
201 MOWNER_ATTACH(&udp_mowner);
202
203 #ifdef INET
204 udpstat_percpu = percpu_alloc(sizeof(uint64_t) * UDP_NSTATS);
205 #endif
206 }
207
208 /*
209 * Checksum extended UDP header and data.
210 */
211
212 int
213 udp_input_checksum(int af, struct mbuf *m, const struct udphdr *uh,
214 int iphlen, int len)
215 {
216 switch (af) {
217 #ifdef INET
218 case AF_INET:
219 return udp4_input_checksum(m, uh, iphlen, len);
220 #endif
221 #ifdef INET6
222 case AF_INET6:
223 return udp6_input_checksum(m, uh, iphlen, len);
224 #endif
225 default:
226 KASSERT(false);
227 }
228 return -1;
229 }
230
231 #ifdef INET
232
233 /*
234 * Checksum extended UDP header and data.
235 */
236
237 static int
238 udp4_input_checksum(struct mbuf *m, const struct udphdr *uh,
239 int iphlen, int len)
240 {
241
242 /*
243 * XXX it's better to record and check if this mbuf is
244 * already checked.
245 */
246
247 if (uh->uh_sum == 0)
248 return 0;
249
250 switch (m->m_pkthdr.csum_flags &
251 ((m->m_pkthdr.rcvif->if_csum_flags_rx & M_CSUM_UDPv4) |
252 M_CSUM_TCP_UDP_BAD | M_CSUM_DATA)) {
253 case M_CSUM_UDPv4|M_CSUM_TCP_UDP_BAD:
254 UDP_CSUM_COUNTER_INCR(&udp_hwcsum_bad);
255 goto badcsum;
256
257 case M_CSUM_UDPv4|M_CSUM_DATA: {
258 u_int32_t hw_csum = m->m_pkthdr.csum_data;
259
260 UDP_CSUM_COUNTER_INCR(&udp_hwcsum_data);
261 if (m->m_pkthdr.csum_flags & M_CSUM_NO_PSEUDOHDR) {
262 const struct ip *ip =
263 mtod(m, const struct ip *);
264
265 hw_csum = in_cksum_phdr(ip->ip_src.s_addr,
266 ip->ip_dst.s_addr,
267 htons(hw_csum + len + IPPROTO_UDP));
268 }
269 if ((hw_csum ^ 0xffff) != 0)
270 goto badcsum;
271 break;
272 }
273
274 case M_CSUM_UDPv4:
275 /* Checksum was okay. */
276 UDP_CSUM_COUNTER_INCR(&udp_hwcsum_ok);
277 break;
278
279 default:
280 /*
281 * Need to compute it ourselves. Maybe skip checksum
282 * on loopback interfaces.
283 */
284 if (__predict_true(!(m->m_pkthdr.rcvif->if_flags &
285 IFF_LOOPBACK) ||
286 udp_do_loopback_cksum)) {
287 UDP_CSUM_COUNTER_INCR(&udp_swcsum);
288 if (in4_cksum(m, IPPROTO_UDP, iphlen, len) != 0)
289 goto badcsum;
290 }
291 break;
292 }
293
294 return 0;
295
296 badcsum:
297 UDP_STATINC(UDP_STAT_BADSUM);
298 return -1;
299 }
300
301 void
302 udp_input(struct mbuf *m, ...)
303 {
304 va_list ap;
305 struct sockaddr_in src, dst;
306 struct ip *ip;
307 struct udphdr *uh;
308 int iphlen, len, n;
309 uint16_t ip_len;
310
311 va_start(ap, m);
312 iphlen = va_arg(ap, int);
313 (void)va_arg(ap, int); /* ignore value, advance ap */
314 va_end(ap);
315
316 MCLAIM(m, &udp_rx_mowner);
317 UDP_STATINC(UDP_STAT_IPACKETS);
318
319 /*
320 * Get IP and UDP header together in first mbuf.
321 */
322 ip = mtod(m, struct ip *);
323 IP6_EXTHDR_GET(uh, struct udphdr *, m, iphlen, sizeof(struct udphdr));
324 if (uh == NULL) {
325 UDP_STATINC(UDP_STAT_HDROPS);
326 return;
327 }
328 KASSERT(UDP_HDR_ALIGNED_P(uh));
329
330 /* Destination port of 0 is illegal, based on RFC 768. */
331 if (uh->uh_dport == 0)
332 goto bad;
333
334 /*
335 * Make mbuf data length reflect UDP length.
336 * If not enough data to reflect UDP length, drop.
337 */
338 ip_len = ntohs(ip->ip_len);
339 len = ntohs((uint16_t)uh->uh_ulen);
340 if (ip_len != iphlen + len) {
341 if (ip_len < iphlen + len || len < sizeof(struct udphdr)) {
342 UDP_STATINC(UDP_STAT_BADLEN);
343 goto bad;
344 }
345 m_adj(m, iphlen + len - ip_len);
346 }
347
348 /*
349 * Checksum extended UDP header and data.
350 */
351 if (udp4_input_checksum(m, uh, iphlen, len))
352 goto bad;
353
354 /* construct source and dst sockaddrs. */
355 sockaddr_in_init(&src, &ip->ip_src, uh->uh_sport);
356 sockaddr_in_init(&dst, &ip->ip_dst, uh->uh_dport);
357
358 if ((n = udp4_realinput(&src, &dst, &m, iphlen)) == -1) {
359 UDP_STATINC(UDP_STAT_HDROPS);
360 return;
361 }
362 if (m == NULL) {
363 /*
364 * packet has been processed by ESP stuff -
365 * e.g. dropped NAT-T-keep-alive-packet ...
366 */
367 return;
368 }
369 ip = mtod(m, struct ip *);
370 #ifdef INET6
371 if (IN_MULTICAST(ip->ip_dst.s_addr) || n == 0) {
372 struct sockaddr_in6 src6, dst6;
373
374 memset(&src6, 0, sizeof(src6));
375 src6.sin6_family = AF_INET6;
376 src6.sin6_len = sizeof(struct sockaddr_in6);
377 src6.sin6_addr.s6_addr[10] = src6.sin6_addr.s6_addr[11] = 0xff;
378 memcpy(&src6.sin6_addr.s6_addr[12], &ip->ip_src,
379 sizeof(ip->ip_src));
380 src6.sin6_port = uh->uh_sport;
381 memset(&dst6, 0, sizeof(dst6));
382 dst6.sin6_family = AF_INET6;
383 dst6.sin6_len = sizeof(struct sockaddr_in6);
384 dst6.sin6_addr.s6_addr[10] = dst6.sin6_addr.s6_addr[11] = 0xff;
385 memcpy(&dst6.sin6_addr.s6_addr[12], &ip->ip_dst,
386 sizeof(ip->ip_dst));
387 dst6.sin6_port = uh->uh_dport;
388
389 n += udp6_realinput(AF_INET, &src6, &dst6, m, iphlen);
390 }
391 #endif
392
393 if (n == 0) {
394 if (m->m_flags & (M_BCAST | M_MCAST)) {
395 UDP_STATINC(UDP_STAT_NOPORTBCAST);
396 goto bad;
397 }
398 UDP_STATINC(UDP_STAT_NOPORT);
399 #ifdef IPKDB
400 if (checkipkdb(&ip->ip_src, uh->uh_sport, uh->uh_dport,
401 m, iphlen + sizeof(struct udphdr),
402 m->m_pkthdr.len - iphlen - sizeof(struct udphdr))) {
403 /*
404 * It was a debugger connect packet,
405 * just drop it now
406 */
407 goto bad;
408 }
409 #endif
410 icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
411 m = NULL;
412 }
413 bad:
414 if (m) {
415 m_freem(m);
416 }
417 return;
418 }
419
420 static void
421 udp4_sendup(struct mbuf *m, int off /* offset of data portion */,
422 struct sockaddr *src, struct socket *so)
423 {
424 struct mbuf *opts = NULL;
425 struct mbuf *n;
426 inpcb_t *inp = NULL;
427
428 if (!so)
429 return;
430 switch (so->so_proto->pr_domain->dom_family) {
431 case AF_INET:
432 inp = sotoinpcb(so);
433 break;
434 #ifdef INET6
435 case AF_INET6:
436 break;
437 #endif
438 default:
439 return;
440 }
441
442 #if defined(IPSEC)
443 /* check AH/ESP integrity. */
444 if (so != NULL && ipsec4_in_reject_so(m, so)) {
445 IPSEC_STATINC(IPSEC_STAT_IN_POLVIO);
446 if ((n = m_copypacket(m, M_DONTWAIT)) != NULL)
447 icmp_error(n, ICMP_UNREACH, ICMP_UNREACH_ADMIN_PROHIBIT,
448 0, 0);
449 return;
450 }
451 #endif /*IPSEC*/
452
453 if ((n = m_copypacket(m, M_DONTWAIT)) != NULL) {
454 if (inp && ((inpcb_get_flags(inp) & INP_CONTROLOPTS) != 0
455 #ifdef SO_OTIMESTAMP
456 || so->so_options & SO_OTIMESTAMP
457 #endif
458 || so->so_options & SO_TIMESTAMP)) {
459 struct ip *ip = mtod(n, struct ip *);
460 ip_savecontrol(inp, &opts, ip, n);
461 }
462
463 m_adj(n, off);
464 if (sbappendaddr(&so->so_rcv, src, n,
465 opts) == 0) {
466 m_freem(n);
467 if (opts)
468 m_freem(opts);
469 so->so_rcv.sb_overflowed++;
470 UDP_STATINC(UDP_STAT_FULLSOCK);
471 } else
472 sorwakeup(so);
473 }
474 }
475
476 struct udp_pcb_ctx {
477 struct mbuf * mbuf;
478 struct sockaddr_in * src;
479 struct sockaddr_in * dst;
480 int off;
481 int rcvcnt;
482 };
483
484 static int
485 udp4_pcb_process(inpcb_t *inp, void *arg)
486 {
487 struct udp_pcb_ctx *uctx = arg;
488 struct in_addr dst4 = uctx->dst->sin_addr;
489 in_port_t dport = uctx->dst->sin_port;
490 struct in_addr laddr, faddr;
491 in_port_t lport, fport;
492 struct socket *so;
493
494 inpcb_get_ports(inp, &lport, &fport);
495 if (lport != dport) {
496 return 0;
497 }
498 inpcb_get_addrs(inp, &laddr, &faddr);
499 if (!in_nullhost(laddr) && !in_hosteq(laddr, dst4)) {
500 return 0;
501 }
502 if (!in_nullhost(faddr)) {
503 struct in_addr src4 = uctx->src->sin_addr;
504 in_port_t sport = uctx->src->sin_port;
505
506 if (!in_hosteq(faddr, src4) || fport != sport) {
507 return 0;
508 }
509 }
510
511 so = inpcb_get_socket(inp);
512 udp4_sendup(uctx->mbuf, uctx->off, (struct sockaddr *)uctx->src, so);
513 uctx->rcvcnt++;
514
515 /*
516 * Do not look for additional matches if this one does not have
517 * either the SO_REUSEPORT or SO_REUSEADDR socket options set.
518 * This heuristic avoids searching through all PCBs in the common
519 * case of a non-shared port. It assumes that an application will
520 * never clear these options after setting them.
521 */
522 if ((so->so_options & (SO_REUSEPORT|SO_REUSEADDR)) == 0) {
523 return EJUSTRETURN;
524 }
525 return 0;
526 }
527
528 static int
529 udp4_realinput(struct sockaddr_in *src, struct sockaddr_in *dst,
530 struct mbuf **mp, int off /* offset of udphdr */)
531 {
532 in_port_t *sport, *dport;
533 struct in_addr *src4, *dst4;
534 inpcb_t *inp;
535 struct mbuf *m = *mp;
536 int rcvcnt;
537
538 rcvcnt = 0;
539 off += sizeof(struct udphdr); /* now, offset of payload */
540
541 if (src->sin_family != AF_INET || dst->sin_family != AF_INET)
542 goto bad;
543
544 src4 = &src->sin_addr;
545 sport = &src->sin_port;
546 dst4 = &dst->sin_addr;
547 dport = &dst->sin_port;
548
549 if (IN_MULTICAST(dst4->s_addr) ||
550 in_broadcast(*dst4, m->m_pkthdr.rcvif)) {
551 struct udp_pcb_ctx uctx = {
552 .mbuf = m, .src = src, .dst = dst,
553 .off = off, .rcvcnt = 0
554 };
555 int error;
556
557 /*
558 * Deliver a multicast or broadcast datagram to *all* sockets
559 * for which the local and remote addresses and ports match
560 * those of the incoming datagram. This allows more than
561 * one process to receive multi/broadcasts on the same port.
562 */
563 error = inpcb_foreach(udbtable, AF_INET,
564 udp4_pcb_process, &uctx);
565 KASSERT(error == 0 || error == EJUSTRETURN);
566 rcvcnt = uctx.rcvcnt;
567 } else {
568 /*
569 * Locate PCB for datagram.
570 */
571 struct socket *so;
572
573 inp = inpcb_lookup(udbtable, *src4, *sport, *dst4, *dport, NULL);
574 if (inp == NULL) {
575 UDP_STATINC(UDP_STAT_PCBHASHMISS);
576 inp = inpcb_lookup_bound(udbtable, *dst4, *dport);
577 if (inp == NULL)
578 return rcvcnt;
579 }
580 so = inpcb_get_socket(inp);
581
582 #ifdef IPSEC
583 /* Handle ESP over UDP */
584 if (inpcb_get_flags(inp) & INP_ESPINUDP_ALL) {
585 struct sockaddr *sa = (struct sockaddr *)src;
586
587 switch (udp4_espinudp(mp, off, sa, so)) {
588 case -1: /* Error, m was freeed */
589 rcvcnt = -1;
590 goto bad;
591 break;
592
593 case 1: /* ESP over UDP */
594 rcvcnt++;
595 goto bad;
596 break;
597
598 case 0: /* plain UDP */
599 default: /* Unexpected */
600 /*
601 * Normal UDP processing will take place
602 * m may have changed.
603 */
604 m = *mp;
605 break;
606 }
607 }
608 #endif
609
610 /*
611 * Check the minimum TTL for socket.
612 */
613 if (mtod(m, struct ip *)->ip_ttl < inpcb_get_minttl(inp)) {
614 goto bad;
615 }
616 udp4_sendup(m, off, (struct sockaddr *)src, so);
617 rcvcnt++;
618 }
619
620 bad:
621 return rcvcnt;
622 }
623 #endif
624
625 #ifdef INET
626 /*
627 * Notify a UDP user of an asynchronous error;
628 * just wake up so that he can collect error status.
629 */
630 static void
631 udp_notify(inpcb_t *inp, int errno)
632 {
633 struct socket *so = inpcb_get_socket(inp);
634
635 so->so_error = errno;
636 sorwakeup(so);
637 sowwakeup(so);
638 }
639
640 void *
641 udp_ctlinput(int cmd, const struct sockaddr *sa, void *v)
642 {
643 struct ip *ip = v;
644 struct udphdr *uh;
645 int errno;
646 bool rdr;
647
648 if (sa->sa_family != AF_INET ||
649 sa->sa_len != sizeof(struct sockaddr_in))
650 return NULL;
651 if ((unsigned)cmd >= PRC_NCMDS)
652 return NULL;
653 errno = inetctlerrmap[cmd];
654
655 rdr = PRC_IS_REDIRECT(cmd);
656 if (rdr || cmd == PRC_HOSTDEAD || ip == NULL) {
657 inpcb_notifyall(udbtable, satocsin(sa)->sin_addr,
658 errno, rdr ? inpcb_rtchange : udp_notify);
659 return NULL;
660 } else if (errno == 0) {
661 return NULL;
662 }
663
664 /* Note: mapped address case */
665 uh = (struct udphdr *)((char *)ip + (ip->ip_hl << 2));
666 inpcb_notify(udbtable, satocsin(sa)->sin_addr, uh->uh_dport,
667 ip->ip_src, uh->uh_sport, errno, udp_notify);
668 return NULL;
669 }
670
671 int
672 udp_ctloutput(int op, struct socket *so, struct sockopt *sopt)
673 {
674 int family, optval, inpflags, error = 0;
675 inpcb_t *inp;
676
677 KASSERT(solocked(so));
678
679 family = so->so_proto->pr_domain->dom_family;
680
681 switch (family) {
682 #ifdef INET
683 case PF_INET:
684 if (sopt->sopt_level != IPPROTO_UDP) {
685 return ip_ctloutput(op, so, sopt);
686 }
687 break;
688 #endif
689 #ifdef INET6
690 case PF_INET6:
691 if (sopt->sopt_level != IPPROTO_UDP) {
692 return ip6_ctloutput(op, so, sopt);
693 }
694 break;
695 #endif
696 default:
697 return EAFNOSUPPORT;
698 }
699
700 switch (op) {
701 case PRCO_SETOPT:
702 inp = sotoinpcb(so);
703
704 switch (sopt->sopt_name) {
705 case UDP_ENCAP:
706 error = sockopt_getint(sopt, &optval);
707 if (error)
708 break;
709
710 inpflags = inpcb_get_flags(inp);
711 switch(optval) {
712 case 0:
713 inpflags &= ~INP_ESPINUDP_ALL;
714 break;
715
716 case UDP_ENCAP_ESPINUDP:
717 inpflags &= ~INP_ESPINUDP_ALL;
718 inpflags |= INP_ESPINUDP;
719 break;
720
721 case UDP_ENCAP_ESPINUDP_NON_IKE:
722 inpflags &= ~INP_ESPINUDP_ALL;
723 inpflags |= INP_ESPINUDP_NON_IKE;
724 break;
725 default:
726 error = EINVAL;
727 break;
728 }
729 inpcb_set_flags(inp, inpflags);
730 break;
731
732 default:
733 error = ENOPROTOOPT;
734 break;
735 }
736 break;
737
738 default:
739 error = EINVAL;
740 break;
741 }
742
743 return error;
744 }
745
746 static int
747 udp_output(struct mbuf *m, inpcb_t *inp)
748 {
749 struct socket *so;
750 struct udpiphdr *ui;
751 struct route *ro;
752 int len = m->m_pkthdr.len;
753 int error = 0;
754
755 MCLAIM(m, &udp_tx_mowner);
756 so = inpcb_get_socket(inp);
757 KASSERT(solocked(so));
758
759 /*
760 * Calculate data length and get a mbuf for UDP and IP headers.
761 */
762 M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT);
763 if (m == NULL) {
764 error = ENOBUFS;
765 goto release;
766 }
767
768 /*
769 * Compute the packet length of the IP header, and
770 * punt if the length looks bogus.
771 */
772 if (len + sizeof(struct udpiphdr) > IP_MAXPACKET) {
773 error = EMSGSIZE;
774 goto release;
775 }
776
777 /*
778 * Fill in mbuf with extended UDP header.
779 */
780 ui = mtod(m, struct udpiphdr *);
781 ui->ui_pr = IPPROTO_UDP;
782 inpcb_get_addrs(inp, &ui->ui_src, &ui->ui_dst);
783 inpcb_get_ports(inp, &ui->ui_sport, &ui->ui_dport);
784 ui->ui_ulen = htons((uint16_t)len + sizeof(struct udphdr));
785
786 /*
787 * Set up checksum and output datagram.
788 */
789 if (udpcksum) {
790 /*
791 * XXX Cache pseudo-header checksum part for
792 * XXX "connected" UDP sockets.
793 */
794 ui->ui_sum = in_cksum_phdr(ui->ui_src.s_addr,
795 ui->ui_dst.s_addr, htons((u_int16_t)len +
796 sizeof(struct udphdr) + IPPROTO_UDP));
797 m->m_pkthdr.csum_flags = M_CSUM_UDPv4;
798 m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum);
799 } else
800 ui->ui_sum = 0;
801
802 struct ip *ui_ip = (struct ip *)ui;
803 ui_ip->ip_len = htons(sizeof (struct udpiphdr) + len);
804
805 struct ip *inp_ip = in_getiphdr(inp);
806 ui_ip->ip_ttl = inp_ip->ip_ttl; /* XXX */
807 ui_ip->ip_tos = inp_ip->ip_tos; /* XXX */
808 UDP_STATINC(UDP_STAT_OPACKETS);
809
810 ro = inpcb_get_route(inp);
811 return ip_output(m, inpcb_get_options(inp), ro,
812 so->so_options & (SO_DONTROUTE | SO_BROADCAST),
813 inpcb_get_moptions(inp), so);
814
815 release:
816 m_freem(m);
817 return error;
818 }
819
820 static int
821 udp_attach(struct socket *so, int proto)
822 {
823 inpcb_t *inp;
824 struct ip *ip;
825 int error;
826
827 KASSERT(sotoinpcb(so) == NULL);
828
829 /* Assign the lock (must happen if we will error out). */
830 sosetlock(so);
831
832 #ifdef MBUFTRACE
833 so->so_mowner = &udp_mowner;
834 so->so_rcv.sb_mowner = &udp_rx_mowner;
835 so->so_snd.sb_mowner = &udp_tx_mowner;
836 #endif
837 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
838 error = soreserve(so, udp_sendspace, udp_recvspace);
839 if (error) {
840 return error;
841 }
842 }
843
844 error = inpcb_create(so, udbtable);
845 if (error) {
846 return error;
847 }
848 inp = sotoinpcb(so);
849 ip = in_getiphdr(inp);
850 ip->ip_ttl = ip_defttl;
851 return error;
852 }
853
854 static void
855 udp_detach(struct socket *so)
856 {
857 inpcb_t *inp;
858
859 KASSERT(solocked(so));
860 inp = sotoinpcb(so);
861 KASSERT(inp != NULL);
862 inpcb_destroy(inp);
863 }
864
865 static int
866 udp_send(struct socket *so, struct mbuf *m, struct mbuf *nam,
867 struct mbuf *control)
868 {
869 inpcb_t *inp;
870 struct in_addr laddr;
871 int error;
872
873 KASSERT(solocked(so));
874 inp = sotoinpcb(so);
875
876 if (control && control->m_len) {
877 m_freem(control);
878 m_freem(m);
879 return EINVAL;
880 }
881
882 if ((so->so_state & SS_ISCONNECTED) != 0) {
883 m_freem(m);
884 return nam ? EISCONN : ENOTCONN;
885 }
886
887 if (nam) {
888 /*
889 * XXX: sendto() case - temporarily connect the socket
890 * to the destination, send and then disconnect. Also,
891 * preserve the local address as it may be changed.
892 */
893 inpcb_get_addrs(inp, &laddr, NULL);
894 if ((error = inpcb_connect(inp, nam, curlwp)) != 0) {
895 m_freem(m);
896 return error;
897 }
898 }
899 error = udp_output(m, inp);
900 if (nam) {
901 inpcb_disconnect(inp);
902 inpcb_set_addrs(inp, &laddr, NULL);
903 inpcb_set_state(inp, INP_BOUND);
904 }
905 return error;
906 }
907
908 static int
909 udp_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
910 struct mbuf *control, struct lwp *l)
911 {
912 inpcb_t *inp;
913 int error = 0;
914
915 KASSERT(req != PRU_ATTACH);
916 KASSERT(req != PRU_DETACH);
917
918 if (req == PRU_CONTROL) {
919 KERNEL_LOCK(1, NULL);
920 error = in_control(so, (long)m, nam, (ifnet_t *)control, l);
921 KERNEL_UNLOCK_ONE(NULL);
922 return error;
923 }
924 if (req == PRU_PURGEIF) {
925 KERNEL_LOCK(1, NULL);
926 mutex_enter(softnet_lock);
927 inpcb_purgeif0(udbtable, (ifnet_t *)control);
928 in_purgeif((ifnet_t *)control);
929 inpcb_purgeif(udbtable, (ifnet_t *)control);
930 mutex_exit(softnet_lock);
931 KERNEL_UNLOCK_ONE(NULL);
932 return 0;
933 }
934
935 KASSERT(solocked(so));
936 inp = sotoinpcb(so);
937
938 KASSERT(!control || (req == PRU_SEND || req == PRU_SENDOOB));
939 if (inp == NULL) {
940 return EINVAL;
941 }
942
943 switch (req) {
944 case PRU_BIND:
945 error = inpcb_bind(inp, nam, l);
946 break;
947
948 case PRU_LISTEN:
949 error = EOPNOTSUPP;
950 break;
951
952 case PRU_CONNECT:
953 error = inpcb_connect(inp, nam, l);
954 if (error)
955 break;
956 soisconnected(so);
957 break;
958
959 case PRU_CONNECT2:
960 error = EOPNOTSUPP;
961 break;
962
963 case PRU_DISCONNECT:
964 /*soisdisconnected(so);*/
965 so->so_state &= ~SS_ISCONNECTED; /* XXX */
966 inpcb_disconnect(inp);
967 break;
968
969 case PRU_SHUTDOWN:
970 socantsendmore(so);
971 break;
972
973 case PRU_RCVD:
974 error = EOPNOTSUPP;
975 break;
976
977 case PRU_SEND:
978 error = udp_send(so, m, nam, control);
979 break;
980
981 case PRU_SENSE:
982 /*
983 * stat: don't bother with a blocksize.
984 */
985 return 0;
986
987 case PRU_RCVOOB:
988 error = EOPNOTSUPP;
989 break;
990
991 case PRU_SENDOOB:
992 m_freem(control);
993 m_freem(m);
994 error = EOPNOTSUPP;
995 break;
996
997 case PRU_SOCKADDR:
998 inpcb_fetch_sockaddr(inp, nam);
999 break;
1000
1001 case PRU_PEERADDR:
1002 inpcb_fetch_peeraddr(inp, nam);
1003 break;
1004
1005 default:
1006 panic("udp_usrreq");
1007 }
1008
1009 return error;
1010 }
1011
1012 static int
1013 sysctl_net_inet_udp_stats(SYSCTLFN_ARGS)
1014 {
1015
1016 return (NETSTAT_SYSCTL(udpstat_percpu, UDP_NSTATS));
1017 }
1018
1019 /*
1020 * Sysctl for udp variables.
1021 */
1022 static void
1023 sysctl_net_inet_udp_setup(struct sysctllog **clog)
1024 {
1025 sysctl_createv(clog, 0, NULL, NULL,
1026 CTLFLAG_PERMANENT,
1027 CTLTYPE_NODE, "net", NULL,
1028 NULL, 0, NULL, 0,
1029 CTL_NET, CTL_EOL);
1030 sysctl_createv(clog, 0, NULL, NULL,
1031 CTLFLAG_PERMANENT,
1032 CTLTYPE_NODE, "inet", NULL,
1033 NULL, 0, NULL, 0,
1034 CTL_NET, PF_INET, CTL_EOL);
1035 sysctl_createv(clog, 0, NULL, NULL,
1036 CTLFLAG_PERMANENT,
1037 CTLTYPE_NODE, "udp",
1038 SYSCTL_DESCR("UDPv4 related settings"),
1039 NULL, 0, NULL, 0,
1040 CTL_NET, PF_INET, IPPROTO_UDP, CTL_EOL);
1041
1042 sysctl_createv(clog, 0, NULL, NULL,
1043 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1044 CTLTYPE_INT, "checksum",
1045 SYSCTL_DESCR("Compute UDP checksums"),
1046 NULL, 0, &udpcksum, 0,
1047 CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_CHECKSUM,
1048 CTL_EOL);
1049 sysctl_createv(clog, 0, NULL, NULL,
1050 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1051 CTLTYPE_INT, "sendspace",
1052 SYSCTL_DESCR("Default UDP send buffer size"),
1053 NULL, 0, &udp_sendspace, 0,
1054 CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_SENDSPACE,
1055 CTL_EOL);
1056 sysctl_createv(clog, 0, NULL, NULL,
1057 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1058 CTLTYPE_INT, "recvspace",
1059 SYSCTL_DESCR("Default UDP receive buffer size"),
1060 NULL, 0, &udp_recvspace, 0,
1061 CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_RECVSPACE,
1062 CTL_EOL);
1063 sysctl_createv(clog, 0, NULL, NULL,
1064 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1065 CTLTYPE_INT, "do_loopback_cksum",
1066 SYSCTL_DESCR("Perform UDP checksum on loopback"),
1067 NULL, 0, &udp_do_loopback_cksum, 0,
1068 CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_LOOPBACKCKSUM,
1069 CTL_EOL);
1070 sysctl_createv(clog, 0, NULL, NULL,
1071 CTLFLAG_PERMANENT,
1072 CTLTYPE_STRUCT, "pcblist",
1073 SYSCTL_DESCR("UDP protocol control block list"),
1074 sysctl_inpcblist, 0, udbtable, 0,
1075 CTL_NET, PF_INET, IPPROTO_UDP, CTL_CREATE,
1076 CTL_EOL);
1077 sysctl_createv(clog, 0, NULL, NULL,
1078 CTLFLAG_PERMANENT,
1079 CTLTYPE_STRUCT, "stats",
1080 SYSCTL_DESCR("UDP statistics"),
1081 sysctl_net_inet_udp_stats, 0, NULL, 0,
1082 CTL_NET, PF_INET, IPPROTO_UDP, UDPCTL_STATS,
1083 CTL_EOL);
1084 }
1085 #endif
1086
1087 void
1088 udp_statinc(u_int stat)
1089 {
1090
1091 KASSERT(stat < UDP_NSTATS);
1092 UDP_STATINC(stat);
1093 }
1094
1095 #if defined(INET) && defined(IPSEC)
1096 /*
1097 * Returns:
1098 * 1 if the packet was processed
1099 * 0 if normal UDP processing should take place
1100 * -1 if an error occurent and m was freed
1101 */
1102 static int
1103 udp4_espinudp(struct mbuf **mp, int off, struct sockaddr *src,
1104 struct socket *so)
1105 {
1106 size_t len;
1107 void *data;
1108 inpcb_t *inp;
1109 size_t skip = 0;
1110 size_t minlen;
1111 size_t iphdrlen;
1112 struct ip *ip;
1113 struct m_tag *tag;
1114 struct udphdr *udphdr;
1115 in_port_t sport, dport;
1116 struct mbuf *m = *mp;
1117 int inpflags;
1118
1119 /*
1120 * Collapse the mbuf chain if the first mbuf is too short
1121 * The longest case is: UDP + non ESP marker + ESP
1122 */
1123 minlen = off + sizeof(u_int64_t) + sizeof(struct esp);
1124 if (minlen > m->m_pkthdr.len)
1125 minlen = m->m_pkthdr.len;
1126
1127 if (m->m_len < minlen) {
1128 if ((*mp = m_pullup(m, minlen)) == NULL) {
1129 printf("udp4_espinudp: m_pullup failed\n");
1130 return -1;
1131 }
1132 m = *mp;
1133 }
1134
1135 len = m->m_len - off;
1136 data = mtod(m, char *) + off;
1137 inp = sotoinpcb(so);
1138
1139 /* Ignore keepalive packets */
1140 if ((len == 1) && (*(unsigned char *)data == 0xff)) {
1141 m_free(m);
1142 *mp = NULL; /* avoid any further processiong by caller ... */
1143 return 1;
1144 }
1145 inpflags = inpcb_get_flags(inp);
1146
1147 /*
1148 * Check that the payload is long enough to hold
1149 * an ESP header and compute the length of encapsulation
1150 * header to remove
1151 */
1152 if (inpflags & INP_ESPINUDP) {
1153 u_int32_t *st = (u_int32_t *)data;
1154
1155 if ((len <= sizeof(struct esp)) || (*st == 0))
1156 return 0; /* Normal UDP processing */
1157
1158 skip = sizeof(struct udphdr);
1159 }
1160
1161 if (inpflags & INP_ESPINUDP_NON_IKE) {
1162 u_int32_t *st = (u_int32_t *)data;
1163
1164 if ((len <= sizeof(u_int64_t) + sizeof(struct esp))
1165 || ((st[0] | st[1]) != 0))
1166 return 0; /* Normal UDP processing */
1167
1168 skip = sizeof(struct udphdr) + sizeof(u_int64_t);
1169 }
1170
1171 /*
1172 * Get the UDP ports. They are handled in network
1173 * order everywhere in IPSEC_NAT_T code.
1174 */
1175 udphdr = (struct udphdr *)((char *)data - skip);
1176 sport = udphdr->uh_sport;
1177 dport = udphdr->uh_dport;
1178
1179 /*
1180 * Remove the UDP header (and possibly the non ESP marker)
1181 * IP header lendth is iphdrlen
1182 * Before:
1183 * <--- off --->
1184 * +----+------+-----+
1185 * | IP | UDP | ESP |
1186 * +----+------+-----+
1187 * <-skip->
1188 * After:
1189 * +----+-----+
1190 * | IP | ESP |
1191 * +----+-----+
1192 * <-skip->
1193 */
1194 iphdrlen = off - sizeof(struct udphdr);
1195 memmove(mtod(m, char *) + skip, mtod(m, void *), iphdrlen);
1196 m_adj(m, skip);
1197
1198 ip = mtod(m, struct ip *);
1199 ip->ip_len = htons(ntohs(ip->ip_len) - skip);
1200 ip->ip_p = IPPROTO_ESP;
1201
1202 /*
1203 * We have modified the packet - it is now ESP, so we should not
1204 * return to UDP processing ...
1205 *
1206 * Add a PACKET_TAG_IPSEC_NAT_T_PORT tag to remember
1207 * the source UDP port. This is required if we want
1208 * to select the right SPD for multiple hosts behind
1209 * same NAT
1210 */
1211 if ((tag = m_tag_get(PACKET_TAG_IPSEC_NAT_T_PORTS,
1212 sizeof(sport) + sizeof(dport), M_DONTWAIT)) == NULL) {
1213 printf("udp4_espinudp: m_tag_get failed\n");
1214 m_freem(m);
1215 return -1;
1216 }
1217 ((u_int16_t *)(tag + 1))[0] = sport;
1218 ((u_int16_t *)(tag + 1))[1] = dport;
1219 m_tag_prepend(m, tag);
1220
1221 #ifdef IPSEC
1222 ipsec4_common_input(m, iphdrlen, IPPROTO_ESP);
1223 #else
1224 esp4_input(m, iphdrlen);
1225 #endif
1226
1227 /* We handled it, it shouldn't be handled by UDP */
1228 *mp = NULL; /* avoid free by caller ... */
1229 return 1;
1230 }
1231 #endif
1232
1233 const struct pr_usrreqs udp_usrreqs = {
1234 .pr_attach = udp_attach,
1235 .pr_detach = udp_detach,
1236 .pr_generic = udp_usrreq,
1237 };
1238