in_pcb.c revision 1.33 1 /* $NetBSD: in_pcb.c,v 1.33 1996/09/15 18:11:06 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/protosw.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/time.h>
48 #include <sys/proc.h>
49
50 #include <net/if.h>
51 #include <net/route.h>
52
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip_var.h>
59
60 struct in_addr zeroin_addr;
61
62 #define INPCBHASH_BIND(table, laddr, lport) \
63 &(table)->inpt_bindhashtbl[ \
64 ((ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_bindhash]
65 #define INPCBHASH_CONNECT(table, faddr, fport, laddr, lport) \
66 &(table)->inpt_connecthashtbl[ \
67 ((ntohl((faddr).s_addr) + ntohs(fport)) + \
68 (ntohl((laddr).s_addr) + ntohs(lport))) & (table)->inpt_connecthash]
69
70 struct inpcb *
71 in_pcblookup_port __P((struct inpcbtable *,
72 struct in_addr, u_int, int));
73
74 void
75 in_pcbinit(table, bindhashsize, connecthashsize)
76 struct inpcbtable *table;
77 int bindhashsize, connecthashsize;
78 {
79
80 CIRCLEQ_INIT(&table->inpt_queue);
81 table->inpt_bindhashtbl =
82 hashinit(bindhashsize, M_PCB, &table->inpt_bindhash);
83 table->inpt_connecthashtbl =
84 hashinit(connecthashsize, M_PCB, &table->inpt_connecthash);
85 table->inpt_lastport = 0;
86 }
87
88 int
89 in_pcballoc(so, v)
90 struct socket *so;
91 void *v;
92 {
93 struct inpcbtable *table = v;
94 register struct inpcb *inp;
95 int s;
96
97 MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_WAITOK);
98 if (inp == NULL)
99 return (ENOBUFS);
100 bzero((caddr_t)inp, sizeof(*inp));
101 inp->inp_table = table;
102 inp->inp_socket = so;
103 so->so_pcb = inp;
104 s = splnet();
105 CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
106 in_pcbstate(inp, INP_ATTACHED);
107 splx(s);
108 return (0);
109 }
110
111 int
112 in_pcbbind(v, nam, p)
113 void *v;
114 struct mbuf *nam;
115 struct proc *p;
116 {
117 register struct inpcb *inp = v;
118 register struct socket *so = inp->inp_socket;
119 register struct inpcbtable *table = inp->inp_table;
120 register struct sockaddr_in *sin;
121 u_int16_t lport = 0;
122 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
123 int error;
124
125 if (in_ifaddr.tqh_first == 0)
126 return (EADDRNOTAVAIL);
127 if (inp->inp_lport || !in_nullhost(inp->inp_laddr))
128 return (EINVAL);
129 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
130 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
131 (so->so_options & SO_ACCEPTCONN) == 0))
132 wild = INPLOOKUP_WILDCARD;
133 if (nam == 0)
134 goto noname;
135 sin = mtod(nam, struct sockaddr_in *);
136 if (nam->m_len != sizeof (*sin))
137 return (EINVAL);
138 #ifdef notdef
139 /*
140 * We should check the family, but old programs
141 * incorrectly fail to initialize it.
142 */
143 if (sin->sin_family != AF_INET)
144 return (EAFNOSUPPORT);
145 #endif
146 lport = sin->sin_port;
147 if (IN_MULTICAST(sin->sin_addr.s_addr)) {
148 /*
149 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
150 * allow complete duplication of binding if
151 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
152 * and a multicast address is bound on both
153 * new and duplicated sockets.
154 */
155 if (so->so_options & SO_REUSEADDR)
156 reuseport = SO_REUSEADDR|SO_REUSEPORT;
157 } else if (!in_nullhost(sin->sin_addr)) {
158 sin->sin_port = 0; /* yech... */
159 if (ifa_ifwithaddr(sintosa(sin)) == 0)
160 return (EADDRNOTAVAIL);
161 }
162 if (lport) {
163 struct inpcb *t;
164 #ifndef IPNOPRIVPORTS
165 /* GROSS */
166 if (ntohs(lport) < IPPORT_RESERVED &&
167 (p == 0 || (error = suser(p->p_ucred, &p->p_acflag))))
168 return (EACCES);
169 #endif
170 t = in_pcblookup_port(table, sin->sin_addr, lport, wild);
171 if (t && (reuseport & t->inp_socket->so_options) == 0)
172 return (EADDRINUSE);
173 }
174 inp->inp_laddr = sin->sin_addr;
175 noname:
176 if (lport == 0)
177 do {
178 if (table->inpt_lastport++ < IPPORT_RESERVED ||
179 table->inpt_lastport > IPPORT_USERRESERVED)
180 table->inpt_lastport = IPPORT_RESERVED;
181 lport = htons(table->inpt_lastport);
182 } while (in_pcblookup_port(table, inp->inp_laddr, lport, wild));
183 inp->inp_lport = lport;
184 in_pcbstate(inp, INP_BOUND);
185 return (0);
186 }
187
188 /*
189 * Connect from a socket to a specified address.
190 * Both address and port must be specified in argument sin.
191 * If don't have a local address for this socket yet,
192 * then pick one.
193 */
194 int
195 in_pcbconnect(v, nam)
196 register void *v;
197 struct mbuf *nam;
198 {
199 register struct inpcb *inp = v;
200 struct in_ifaddr *ia;
201 struct sockaddr_in *ifaddr = NULL;
202 register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
203
204 if (nam->m_len != sizeof (*sin))
205 return (EINVAL);
206 if (sin->sin_family != AF_INET)
207 return (EAFNOSUPPORT);
208 if (sin->sin_port == 0)
209 return (EADDRNOTAVAIL);
210 if (in_ifaddr.tqh_first != 0) {
211 /*
212 * If the destination address is INADDR_ANY,
213 * use the primary local address.
214 * If the supplied address is INADDR_BROADCAST,
215 * and the primary interface supports broadcast,
216 * choose the broadcast address for that interface.
217 */
218 if (in_nullhost(sin->sin_addr))
219 sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr;
220 else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
221 (in_ifaddr.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
222 sin->sin_addr = in_ifaddr.tqh_first->ia_broadaddr.sin_addr;
223 }
224 /*
225 * If we haven't bound which network number to use as ours,
226 * we will use the number of the outgoing interface.
227 * This depends on having done a routing lookup, which
228 * we will probably have to do anyway, so we might
229 * as well do it now. On the other hand if we are
230 * sending to multiple destinations we may have already
231 * done the lookup, so see if we can use the route
232 * from before. In any case, we only
233 * chose a port number once, even if sending to multiple
234 * destinations.
235 */
236 if (in_nullhost(inp->inp_laddr)) {
237 register struct route *ro;
238
239 ia = (struct in_ifaddr *)0;
240 /*
241 * If route is known or can be allocated now,
242 * our src addr is taken from the i/f, else punt.
243 */
244 ro = &inp->inp_route;
245 if (ro->ro_rt &&
246 (!in_hosteq(satosin(&ro->ro_dst)->sin_addr,
247 sin->sin_addr) ||
248 inp->inp_socket->so_options & SO_DONTROUTE)) {
249 RTFREE(ro->ro_rt);
250 ro->ro_rt = (struct rtentry *)0;
251 }
252 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
253 (ro->ro_rt == (struct rtentry *)0 ||
254 ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
255 /* No route yet, so try to acquire one */
256 ro->ro_dst.sa_family = AF_INET;
257 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
258 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
259 rtalloc(ro);
260 }
261 /*
262 * If we found a route, use the address
263 * corresponding to the outgoing interface
264 * unless it is the loopback (in case a route
265 * to our address on another net goes to loopback).
266 */
267 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
268 ia = ifatoia(ro->ro_rt->rt_ifa);
269 if (ia == 0) {
270 u_int16_t fport = sin->sin_port;
271
272 sin->sin_port = 0;
273 ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
274 sin->sin_port = fport;
275 if (ia == 0)
276 ia = in_ifaddr.tqh_first;
277 if (ia == 0)
278 return (EADDRNOTAVAIL);
279 }
280 /*
281 * If the destination address is multicast and an outgoing
282 * interface has been set as a multicast option, use the
283 * address of that interface as our source address.
284 */
285 if (IN_MULTICAST(sin->sin_addr.s_addr) &&
286 inp->inp_moptions != NULL) {
287 struct ip_moptions *imo;
288 struct ifnet *ifp;
289
290 imo = inp->inp_moptions;
291 if (imo->imo_multicast_ifp != NULL) {
292 ifp = imo->imo_multicast_ifp;
293 for (ia = in_ifaddr.tqh_first; ia != 0;
294 ia = ia->ia_list.tqe_next)
295 if (ia->ia_ifp == ifp)
296 break;
297 if (ia == 0)
298 return (EADDRNOTAVAIL);
299 }
300 }
301 ifaddr = satosin(&ia->ia_addr);
302 }
303 if (in_pcblookup_connect(inp->inp_table, sin->sin_addr, sin->sin_port,
304 !in_nullhost(inp->inp_laddr) ? inp->inp_laddr : ifaddr->sin_addr,
305 inp->inp_lport) != 0)
306 return (EADDRINUSE);
307 if (in_nullhost(inp->inp_laddr)) {
308 if (inp->inp_lport == 0)
309 (void)in_pcbbind(inp, (struct mbuf *)0,
310 (struct proc *)0);
311 inp->inp_laddr = ifaddr->sin_addr;
312 }
313 inp->inp_faddr = sin->sin_addr;
314 inp->inp_fport = sin->sin_port;
315 in_pcbstate(inp, INP_CONNECTED);
316 return (0);
317 }
318
319 void
320 in_pcbdisconnect(v)
321 void *v;
322 {
323 struct inpcb *inp = v;
324
325 inp->inp_faddr = zeroin_addr;
326 inp->inp_fport = 0;
327 in_pcbstate(inp, INP_BOUND);
328 if (inp->inp_socket->so_state & SS_NOFDREF)
329 in_pcbdetach(inp);
330 }
331
332 void
333 in_pcbdetach(v)
334 void *v;
335 {
336 struct inpcb *inp = v;
337 struct socket *so = inp->inp_socket;
338 int s;
339
340 so->so_pcb = 0;
341 sofree(so);
342 if (inp->inp_options)
343 (void)m_free(inp->inp_options);
344 if (inp->inp_route.ro_rt)
345 rtfree(inp->inp_route.ro_rt);
346 ip_freemoptions(inp->inp_moptions);
347 s = splnet();
348 in_pcbstate(inp, INP_ATTACHED);
349 CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
350 splx(s);
351 FREE(inp, M_PCB);
352 }
353
354 void
355 in_setsockaddr(inp, nam)
356 register struct inpcb *inp;
357 struct mbuf *nam;
358 {
359 register struct sockaddr_in *sin;
360
361 nam->m_len = sizeof (*sin);
362 sin = mtod(nam, struct sockaddr_in *);
363 bzero((caddr_t)sin, sizeof (*sin));
364 sin->sin_family = AF_INET;
365 sin->sin_len = sizeof(*sin);
366 sin->sin_port = inp->inp_lport;
367 sin->sin_addr = inp->inp_laddr;
368 }
369
370 void
371 in_setpeeraddr(inp, nam)
372 struct inpcb *inp;
373 struct mbuf *nam;
374 {
375 register struct sockaddr_in *sin;
376
377 nam->m_len = sizeof (*sin);
378 sin = mtod(nam, struct sockaddr_in *);
379 bzero((caddr_t)sin, sizeof (*sin));
380 sin->sin_family = AF_INET;
381 sin->sin_len = sizeof(*sin);
382 sin->sin_port = inp->inp_fport;
383 sin->sin_addr = inp->inp_faddr;
384 }
385
386 /*
387 * Pass some notification to all connections of a protocol
388 * associated with address dst. The local address and/or port numbers
389 * may be specified to limit the search. The "usual action" will be
390 * taken, depending on the ctlinput cmd. The caller must filter any
391 * cmds that are uninteresting (e.g., no error in the map).
392 * Call the protocol specific routine (if any) to report
393 * any errors for each matching socket.
394 *
395 * Must be called at splsoftnet.
396 */
397 void
398 in_pcbnotify(table, faddr, fport_arg, laddr, lport_arg, errno, notify)
399 struct inpcbtable *table;
400 struct in_addr faddr, laddr;
401 u_int fport_arg, lport_arg;
402 int errno;
403 void (*notify) __P((struct inpcb *, int));
404 {
405 struct inpcbhead *head;
406 register struct inpcb *inp, *ninp;
407 u_int16_t fport = fport_arg, lport = lport_arg;
408
409 if (in_nullhost(faddr) || notify == 0)
410 return;
411
412 head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
413 for (inp = head->lh_first; inp != NULL; inp = ninp) {
414 ninp = inp->inp_hash.le_next;
415 if (in_hosteq(inp->inp_faddr, faddr) &&
416 inp->inp_fport == fport &&
417 inp->inp_lport == lport &&
418 in_hosteq(inp->inp_laddr, laddr))
419 (*notify)(inp, errno);
420 }
421 }
422
423 void
424 in_pcbnotifyall(table, faddr, errno, notify)
425 struct inpcbtable *table;
426 struct in_addr faddr;
427 int errno;
428 void (*notify) __P((struct inpcb *, int));
429 {
430 register struct inpcb *inp, *ninp;
431
432 if (in_nullhost(faddr) || notify == 0)
433 return;
434
435 for (inp = table->inpt_queue.cqh_first;
436 inp != (struct inpcb *)&table->inpt_queue;
437 inp = ninp) {
438 ninp = inp->inp_queue.cqe_next;
439 if (in_hosteq(inp->inp_faddr, faddr))
440 (*notify)(inp, errno);
441 }
442 }
443
444 /*
445 * Check for alternatives when higher level complains
446 * about service problems. For now, invalidate cached
447 * routing information. If the route was created dynamically
448 * (by a redirect), time to try a default gateway again.
449 */
450 void
451 in_losing(inp)
452 struct inpcb *inp;
453 {
454 register struct rtentry *rt;
455 struct rt_addrinfo info;
456
457 if ((rt = inp->inp_route.ro_rt)) {
458 inp->inp_route.ro_rt = 0;
459 bzero((caddr_t)&info, sizeof(info));
460 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
461 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
462 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
463 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
464 if (rt->rt_flags & RTF_DYNAMIC)
465 (void) rtrequest(RTM_DELETE, rt_key(rt),
466 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
467 (struct rtentry **)0);
468 else
469 /*
470 * A new route can be allocated
471 * the next time output is attempted.
472 */
473 rtfree(rt);
474 }
475 }
476
477 /*
478 * After a routing change, flush old routing
479 * and allocate a (hopefully) better one.
480 */
481 void
482 in_rtchange(inp, errno)
483 register struct inpcb *inp;
484 int errno;
485 {
486
487 if (inp->inp_route.ro_rt) {
488 rtfree(inp->inp_route.ro_rt);
489 inp->inp_route.ro_rt = 0;
490 /*
491 * A new route can be allocated the next time
492 * output is attempted.
493 */
494 }
495 /* SHOULD NOTIFY HIGHER-LEVEL PROTOCOLS */
496 }
497
498 struct inpcb *
499 in_pcblookup_port(table, laddr, lport_arg, flags)
500 struct inpcbtable *table;
501 struct in_addr laddr;
502 u_int lport_arg;
503 int flags;
504 {
505 register struct inpcb *inp, *match = 0;
506 int matchwild = 3, wildcard;
507 u_int16_t lport = lport_arg;
508
509 for (inp = table->inpt_queue.cqh_first;
510 inp != (struct inpcb *)&table->inpt_queue;
511 inp = inp->inp_queue.cqe_next) {
512 if (inp->inp_lport != lport)
513 continue;
514 wildcard = 0;
515 if (!in_nullhost(inp->inp_faddr))
516 wildcard++;
517 if (in_nullhost(inp->inp_laddr)) {
518 if (!in_nullhost(laddr))
519 wildcard++;
520 } else {
521 if (in_nullhost(laddr))
522 wildcard++;
523 else {
524 if (!in_hosteq(inp->inp_laddr, laddr))
525 continue;
526 }
527 }
528 if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)
529 continue;
530 if (wildcard < matchwild) {
531 match = inp;
532 matchwild = wildcard;
533 if (matchwild == 0)
534 break;
535 }
536 }
537 return (match);
538 }
539
540 #ifdef DIAGNOSTIC
541 int in_pcbnotifymiss = 0;
542 #endif
543
544 struct inpcb *
545 in_pcblookup_connect(table, faddr, fport_arg, laddr, lport_arg)
546 struct inpcbtable *table;
547 struct in_addr faddr, laddr;
548 u_int fport_arg, lport_arg;
549 {
550 struct inpcbhead *head;
551 register struct inpcb *inp;
552 u_int16_t fport = fport_arg, lport = lport_arg;
553
554 head = INPCBHASH_CONNECT(table, faddr, fport, laddr, lport);
555 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
556 if (in_hosteq(inp->inp_faddr, faddr) &&
557 inp->inp_fport == fport &&
558 inp->inp_lport == lport &&
559 in_hosteq(inp->inp_laddr, laddr))
560 goto out;
561 }
562 #ifdef DIAGNOSTIC
563 if (in_pcbnotifymiss) {
564 printf("in_pcblookup_connect: faddr=%08x fport=%d laddr=%08x lport=%d\n",
565 ntohl(faddr.s_addr), ntohs(fport),
566 ntohl(laddr.s_addr), ntohs(lport));
567 }
568 #endif
569 return (0);
570
571 out:
572 /* Move this PCB to the head of hash chain. */
573 if (inp != head->lh_first) {
574 LIST_REMOVE(inp, inp_hash);
575 LIST_INSERT_HEAD(head, inp, inp_hash);
576 }
577 return (inp);
578 }
579
580 struct inpcb *
581 in_pcblookup_bind(table, laddr, lport_arg)
582 struct inpcbtable *table;
583 struct in_addr laddr;
584 u_int lport_arg;
585 {
586 struct inpcbhead *head;
587 register struct inpcb *inp;
588 u_int16_t lport = lport_arg;
589
590 head = INPCBHASH_BIND(table, laddr, lport);
591 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
592 if (inp->inp_lport == lport &&
593 in_hosteq(inp->inp_laddr, laddr))
594 goto out;
595 }
596 head = INPCBHASH_BIND(table, zeroin_addr, lport);
597 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
598 if (inp->inp_lport == lport &&
599 in_hosteq(inp->inp_laddr, zeroin_addr))
600 goto out;
601 }
602 #ifdef DIAGNOSTIC
603 if (in_pcbnotifymiss) {
604 printf("in_pcblookup_bind: laddr=%08x lport=%d\n",
605 ntohl(laddr.s_addr), ntohs(lport));
606 }
607 #endif
608 return (0);
609
610 out:
611 /* Move this PCB to the head of hash chain. */
612 if (inp != head->lh_first) {
613 LIST_REMOVE(inp, inp_hash);
614 LIST_INSERT_HEAD(head, inp, inp_hash);
615 }
616 return (inp);
617 }
618
619 void
620 in_pcbstate(inp, state)
621 struct inpcb *inp;
622 int state;
623 {
624
625 if (inp->inp_state > INP_ATTACHED)
626 LIST_REMOVE(inp, inp_hash);
627
628 switch (state) {
629 case INP_BOUND:
630 LIST_INSERT_HEAD(INPCBHASH_BIND(inp->inp_table,
631 inp->inp_laddr, inp->inp_lport), inp, inp_hash);
632 break;
633 case INP_CONNECTED:
634 LIST_INSERT_HEAD(INPCBHASH_CONNECT(inp->inp_table,
635 inp->inp_faddr, inp->inp_fport,
636 inp->inp_laddr, inp->inp_lport), inp, inp_hash);
637 break;
638 }
639
640 inp->inp_state = state;
641 }
642