in_pcb.c revision 1.27.4.1 1 /* $NetBSD: in_pcb.c,v 1.27.4.1 1996/12/10 11:39:02 mycroft Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the University of
18 * California, Berkeley and its contributors.
19 * 4. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94
36 */
37
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/protosw.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/ioctl.h>
46 #include <sys/errno.h>
47 #include <sys/time.h>
48 #include <sys/proc.h>
49
50 #include <net/if.h>
51 #include <net/route.h>
52
53 #include <netinet/in.h>
54 #include <netinet/in_systm.h>
55 #include <netinet/ip.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/in_var.h>
58 #include <netinet/ip_var.h>
59
60 struct in_addr zeroin_addr;
61
62 #define INPCBHASH(table, faddr, fport, laddr, lport) \
63 &(table)->inpt_hashtbl[(ntohl((faddr)->s_addr) + ntohs((fport)) + ntohs((lport))) & (table->inpt_hash)]
64
65 void
66 in_pcbinit(table, hashsize)
67 struct inpcbtable *table;
68 int hashsize;
69 {
70
71 CIRCLEQ_INIT(&table->inpt_queue);
72 table->inpt_hashtbl = hashinit(hashsize, M_PCB, &table->inpt_hash);
73 table->inpt_lastport = IPPORT_RESERVED;
74 }
75
76 int
77 in_pcballoc(so, v)
78 struct socket *so;
79 void *v;
80 {
81 struct inpcbtable *table = v;
82 register struct inpcb *inp;
83 int s;
84
85 MALLOC(inp, struct inpcb *, sizeof(*inp), M_PCB, M_WAITOK);
86 if (inp == NULL)
87 return (ENOBUFS);
88 bzero((caddr_t)inp, sizeof(*inp));
89 inp->inp_table = table;
90 inp->inp_socket = so;
91 s = splnet();
92 CIRCLEQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
93 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
94 &inp->inp_laddr, inp->inp_lport), inp, inp_hash);
95 splx(s);
96 so->so_pcb = inp;
97 return (0);
98 }
99
100 int
101 in_pcbbind(v, nam)
102 register void *v;
103 struct mbuf *nam;
104 {
105 register struct inpcb *inp = v;
106 register struct socket *so = inp->inp_socket;
107 register struct inpcbtable *table = inp->inp_table;
108 register struct sockaddr_in *sin;
109 struct proc *p = curproc; /* XXX */
110 u_int16_t lport = 0;
111 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
112 int error;
113
114 if (in_ifaddr.tqh_first == 0)
115 return (EADDRNOTAVAIL);
116 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
117 return (EINVAL);
118 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0 &&
119 ((so->so_proto->pr_flags & PR_CONNREQUIRED) == 0 ||
120 (so->so_options & SO_ACCEPTCONN) == 0))
121 wild = INPLOOKUP_WILDCARD;
122 if (nam) {
123 sin = mtod(nam, struct sockaddr_in *);
124 if (nam->m_len != sizeof (*sin))
125 return (EINVAL);
126 #ifdef notdef
127 /*
128 * We should check the family, but old programs
129 * incorrectly fail to initialize it.
130 */
131 if (sin->sin_family != AF_INET)
132 return (EAFNOSUPPORT);
133 #endif
134 lport = sin->sin_port;
135 if (IN_MULTICAST(sin->sin_addr.s_addr)) {
136 /*
137 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
138 * allow complete duplication of binding if
139 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
140 * and a multicast address is bound on both
141 * new and duplicated sockets.
142 */
143 if (so->so_options & SO_REUSEADDR)
144 reuseport = SO_REUSEADDR|SO_REUSEPORT;
145 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
146 sin->sin_port = 0; /* yech... */
147 if (ifa_ifwithaddr(sintosa(sin)) == 0)
148 return (EADDRNOTAVAIL);
149 }
150 if (lport) {
151 struct inpcb *t;
152
153 /* GROSS */
154 if (ntohs(lport) < IPPORT_RESERVED &&
155 (error = suser(p->p_ucred, &p->p_acflag)))
156 return (EACCES);
157 t = in_pcblookup(table, zeroin_addr, 0,
158 sin->sin_addr, lport, wild);
159 if (t && (reuseport & t->inp_socket->so_options) == 0)
160 return (EADDRINUSE);
161 }
162 inp->inp_laddr = sin->sin_addr;
163 }
164 if (lport == 0) {
165 for (lport = table->inpt_lastport + 1;
166 lport < IPPORT_USERRESERVED; lport++)
167 if (!in_pcblookup(table, zeroin_addr, 0, inp->inp_laddr,
168 htons(lport), wild))
169 goto found;
170 for (lport = IPPORT_RESERVED;
171 lport <= table->inpt_lastport; lport++)
172 if (!in_pcblookup(table, zeroin_addr, 0, inp->inp_laddr,
173 htons(lport), wild))
174 goto found;
175 return (EAGAIN);
176 found:
177 table->inpt_lastport = lport;
178 lport = htons(lport);
179 }
180 inp->inp_lport = lport;
181 in_pcbrehash(inp);
182 return (0);
183 }
184
185 /*
186 * Connect from a socket to a specified address.
187 * Both address and port must be specified in argument sin.
188 * If don't have a local address for this socket yet,
189 * then pick one.
190 */
191 int
192 in_pcbconnect(v, nam)
193 register void *v;
194 struct mbuf *nam;
195 {
196 register struct inpcb *inp = v;
197 struct in_ifaddr *ia;
198 struct sockaddr_in *ifaddr = NULL;
199 register struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
200
201 if (nam->m_len != sizeof (*sin))
202 return (EINVAL);
203 if (sin->sin_family != AF_INET)
204 return (EAFNOSUPPORT);
205 if (sin->sin_port == 0)
206 return (EADDRNOTAVAIL);
207 if (in_ifaddr.tqh_first != 0) {
208 /*
209 * If the destination address is INADDR_ANY,
210 * use the primary local address.
211 * If the supplied address is INADDR_BROADCAST,
212 * and the primary interface supports broadcast,
213 * choose the broadcast address for that interface.
214 */
215 if (sin->sin_addr.s_addr == INADDR_ANY)
216 sin->sin_addr = in_ifaddr.tqh_first->ia_addr.sin_addr;
217 else if (sin->sin_addr.s_addr == INADDR_BROADCAST &&
218 (in_ifaddr.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
219 sin->sin_addr = in_ifaddr.tqh_first->ia_broadaddr.sin_addr;
220 }
221 if (inp->inp_laddr.s_addr == INADDR_ANY) {
222 register struct route *ro;
223
224 ia = (struct in_ifaddr *)0;
225 /*
226 * If route is known or can be allocated now,
227 * our src addr is taken from the i/f, else punt.
228 */
229 ro = &inp->inp_route;
230 if (ro->ro_rt &&
231 (satosin(&ro->ro_dst)->sin_addr.s_addr !=
232 sin->sin_addr.s_addr ||
233 inp->inp_socket->so_options & SO_DONTROUTE)) {
234 RTFREE(ro->ro_rt);
235 ro->ro_rt = (struct rtentry *)0;
236 }
237 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
238 (ro->ro_rt == (struct rtentry *)0 ||
239 ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
240 /* No route yet, so try to acquire one */
241 ro->ro_dst.sa_family = AF_INET;
242 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
243 satosin(&ro->ro_dst)->sin_addr = sin->sin_addr;
244 rtalloc(ro);
245 }
246 /*
247 * If we found a route, use the address
248 * corresponding to the outgoing interface
249 * unless it is the loopback (in case a route
250 * to our address on another net goes to loopback).
251 */
252 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
253 ia = ifatoia(ro->ro_rt->rt_ifa);
254 if (ia == 0) {
255 u_int16_t fport = sin->sin_port;
256
257 sin->sin_port = 0;
258 ia = ifatoia(ifa_ifwithladdr(sintosa(sin)));
259 sin->sin_port = fport;
260 if (ia == 0)
261 ia = in_ifaddr.tqh_first;
262 if (ia == 0)
263 return (EADDRNOTAVAIL);
264 }
265 /*
266 * If the destination address is multicast and an outgoing
267 * interface has been set as a multicast option, use the
268 * address of that interface as our source address.
269 */
270 if (IN_MULTICAST(sin->sin_addr.s_addr) &&
271 inp->inp_moptions != NULL) {
272 struct ip_moptions *imo;
273 struct ifnet *ifp;
274
275 imo = inp->inp_moptions;
276 if (imo->imo_multicast_ifp != NULL) {
277 ifp = imo->imo_multicast_ifp;
278 for (ia = in_ifaddr.tqh_first; ia != 0;
279 ia = ia->ia_list.tqe_next)
280 if (ia->ia_ifp == ifp)
281 break;
282 if (ia == 0)
283 return (EADDRNOTAVAIL);
284 }
285 }
286 ifaddr = satosin(&ia->ia_addr);
287 }
288 if (in_pcbhashlookup(inp->inp_table, sin->sin_addr, sin->sin_port,
289 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
290 inp->inp_lport) != 0)
291 return (EADDRINUSE);
292 if (inp->inp_laddr.s_addr == INADDR_ANY) {
293 if (inp->inp_lport == 0)
294 (void)in_pcbbind(inp, (struct mbuf *)0);
295 inp->inp_laddr = ifaddr->sin_addr;
296 }
297 inp->inp_faddr = sin->sin_addr;
298 inp->inp_fport = sin->sin_port;
299 in_pcbrehash(inp);
300 return (0);
301 }
302
303 void
304 in_pcbdisconnect(v)
305 void *v;
306 {
307 struct inpcb *inp = v;
308
309 inp->inp_faddr.s_addr = INADDR_ANY;
310 inp->inp_fport = 0;
311 in_pcbrehash(inp);
312 if (inp->inp_socket->so_state & SS_NOFDREF)
313 in_pcbdetach(inp);
314 }
315
316 void
317 in_pcbdetach(v)
318 void *v;
319 {
320 struct inpcb *inp = v;
321 struct socket *so = inp->inp_socket;
322 int s;
323
324 so->so_pcb = 0;
325 sofree(so);
326 if (inp->inp_options)
327 (void)m_free(inp->inp_options);
328 if (inp->inp_route.ro_rt)
329 rtfree(inp->inp_route.ro_rt);
330 ip_freemoptions(inp->inp_moptions);
331 s = splnet();
332 LIST_REMOVE(inp, inp_hash);
333 CIRCLEQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
334 splx(s);
335 FREE(inp, M_PCB);
336 }
337
338 void
339 in_setsockaddr(inp, nam)
340 register struct inpcb *inp;
341 struct mbuf *nam;
342 {
343 register struct sockaddr_in *sin;
344
345 nam->m_len = sizeof (*sin);
346 sin = mtod(nam, struct sockaddr_in *);
347 bzero((caddr_t)sin, sizeof (*sin));
348 sin->sin_family = AF_INET;
349 sin->sin_len = sizeof(*sin);
350 sin->sin_port = inp->inp_lport;
351 sin->sin_addr = inp->inp_laddr;
352 }
353
354 void
355 in_setpeeraddr(inp, nam)
356 struct inpcb *inp;
357 struct mbuf *nam;
358 {
359 register struct sockaddr_in *sin;
360
361 nam->m_len = sizeof (*sin);
362 sin = mtod(nam, struct sockaddr_in *);
363 bzero((caddr_t)sin, sizeof (*sin));
364 sin->sin_family = AF_INET;
365 sin->sin_len = sizeof(*sin);
366 sin->sin_port = inp->inp_fport;
367 sin->sin_addr = inp->inp_faddr;
368 }
369
370 /*
371 * Pass some notification to all connections of a protocol
372 * associated with address dst. The local address and/or port numbers
373 * may be specified to limit the search. The "usual action" will be
374 * taken, depending on the ctlinput cmd. The caller must filter any
375 * cmds that are uninteresting (e.g., no error in the map).
376 * Call the protocol specific routine (if any) to report
377 * any errors for each matching socket.
378 *
379 * Must be called at splsoftnet.
380 */
381 void
382 in_pcbnotify(table, dst, fport_arg, laddr, lport_arg, errno, notify)
383 struct inpcbtable *table;
384 struct sockaddr *dst;
385 u_int fport_arg, lport_arg;
386 struct in_addr laddr;
387 int errno;
388 void (*notify) __P((struct inpcb *, int));
389 {
390 register struct inpcb *inp, *oinp;
391 struct in_addr faddr;
392 u_int16_t fport = fport_arg, lport = lport_arg;
393
394 if (dst->sa_family != AF_INET)
395 return;
396 faddr = satosin(dst)->sin_addr;
397 if (faddr.s_addr == INADDR_ANY)
398 return;
399
400 for (inp = table->inpt_queue.cqh_first;
401 inp != (struct inpcb *)&table->inpt_queue;) {
402 if (inp->inp_faddr.s_addr != faddr.s_addr ||
403 inp->inp_socket == 0 ||
404 inp->inp_fport != fport ||
405 inp->inp_lport != lport ||
406 inp->inp_laddr.s_addr != laddr.s_addr) {
407 inp = inp->inp_queue.cqe_next;
408 continue;
409 }
410 oinp = inp;
411 inp = inp->inp_queue.cqe_next;
412 if (notify)
413 (*notify)(oinp, errno);
414 }
415 }
416
417 void
418 in_pcbnotifyall(table, dst, errno, notify)
419 struct inpcbtable *table;
420 struct sockaddr *dst;
421 int errno;
422 void (*notify) __P((struct inpcb *, int));
423 {
424 register struct inpcb *inp, *oinp;
425 struct in_addr faddr;
426
427 if (dst->sa_family != AF_INET)
428 return;
429 faddr = satosin(dst)->sin_addr;
430 if (faddr.s_addr == INADDR_ANY)
431 return;
432
433 for (inp = table->inpt_queue.cqh_first;
434 inp != (struct inpcb *)&table->inpt_queue;) {
435 if (inp->inp_faddr.s_addr != faddr.s_addr ||
436 inp->inp_socket == 0) {
437 inp = inp->inp_queue.cqe_next;
438 continue;
439 }
440 oinp = inp;
441 inp = inp->inp_queue.cqe_next;
442 if (notify)
443 (*notify)(oinp, errno);
444 }
445 }
446
447 /*
448 * Check for alternatives when higher level complains
449 * about service problems. For now, invalidate cached
450 * routing information. If the route was created dynamically
451 * (by a redirect), time to try a default gateway again.
452 */
453 void
454 in_losing(inp)
455 struct inpcb *inp;
456 {
457 register struct rtentry *rt;
458 struct rt_addrinfo info;
459
460 if ((rt = inp->inp_route.ro_rt)) {
461 inp->inp_route.ro_rt = 0;
462 bzero((caddr_t)&info, sizeof(info));
463 info.rti_info[RTAX_DST] = &inp->inp_route.ro_dst;
464 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
465 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
466 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
467 if (rt->rt_flags & RTF_DYNAMIC)
468 (void) rtrequest(RTM_DELETE, rt_key(rt),
469 rt->rt_gateway, rt_mask(rt), rt->rt_flags,
470 (struct rtentry **)0);
471 else
472 /*
473 * A new route can be allocated
474 * the next time output is attempted.
475 */
476 rtfree(rt);
477 }
478 }
479
480 /*
481 * After a routing change, flush old routing
482 * and allocate a (hopefully) better one.
483 */
484 void
485 in_rtchange(inp, errno)
486 register struct inpcb *inp;
487 int errno;
488 {
489 if (inp->inp_route.ro_rt) {
490 rtfree(inp->inp_route.ro_rt);
491 inp->inp_route.ro_rt = 0;
492 /*
493 * A new route can be allocated the next time
494 * output is attempted.
495 */
496 }
497 }
498
499 struct inpcb *
500 in_pcblookup(table, faddr, fport_arg, laddr, lport_arg, flags)
501 struct inpcbtable *table;
502 struct in_addr faddr, laddr;
503 u_int fport_arg, lport_arg;
504 int flags;
505 {
506 register struct inpcb *inp, *match = 0;
507 int matchwild = 3, wildcard;
508 u_int16_t fport = fport_arg, lport = lport_arg;
509
510 for (inp = table->inpt_queue.cqh_first;
511 inp != (struct inpcb *)&table->inpt_queue;
512 inp = inp->inp_queue.cqe_next) {
513 if (inp->inp_lport != lport)
514 continue;
515 wildcard = 0;
516 if (inp->inp_faddr.s_addr != INADDR_ANY) {
517 if (faddr.s_addr == INADDR_ANY)
518 wildcard++;
519 else if (inp->inp_faddr.s_addr != faddr.s_addr ||
520 inp->inp_fport != fport)
521 continue;
522 } else {
523 if (faddr.s_addr != INADDR_ANY)
524 wildcard++;
525 }
526 if (inp->inp_laddr.s_addr != INADDR_ANY) {
527 if (laddr.s_addr == INADDR_ANY)
528 wildcard++;
529 else if (inp->inp_laddr.s_addr != laddr.s_addr)
530 continue;
531 } else {
532 if (laddr.s_addr != INADDR_ANY)
533 wildcard++;
534 }
535 if (wildcard && (flags & INPLOOKUP_WILDCARD) == 0)
536 continue;
537 if (wildcard < matchwild) {
538 match = inp;
539 matchwild = wildcard;
540 if (matchwild == 0)
541 break;
542 }
543 }
544 return (match);
545 }
546
547 void
548 in_pcbrehash(inp)
549 struct inpcb *inp;
550 {
551 struct inpcbtable *table = inp->inp_table;
552 int s;
553
554 s = splnet();
555 LIST_REMOVE(inp, inp_hash);
556 LIST_INSERT_HEAD(INPCBHASH(table, &inp->inp_faddr, inp->inp_fport,
557 &inp->inp_laddr, inp->inp_lport), inp, inp_hash);
558 splx(s);
559 }
560
561 #ifdef DIAGNOSTIC
562 int in_pcbnotifymiss = 0;
563 #endif
564
565 struct inpcb *
566 in_pcbhashlookup(table, faddr, fport_arg, laddr, lport_arg)
567 struct inpcbtable *table;
568 struct in_addr faddr, laddr;
569 u_int fport_arg, lport_arg;
570 {
571 struct inpcbhead *head;
572 register struct inpcb *inp;
573 u_int16_t fport = fport_arg, lport = lport_arg;
574
575 head = INPCBHASH(table, &faddr, fport, &laddr, lport);
576 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
577 if (inp->inp_faddr.s_addr == faddr.s_addr &&
578 inp->inp_fport == fport &&
579 inp->inp_lport == lport &&
580 inp->inp_laddr.s_addr == laddr.s_addr) {
581 /*
582 * Move this PCB to the head of hash chain so that
583 * repeated accesses are quicker. This is analogous to
584 * the historic single-entry PCB cache.
585 */
586 if (inp != head->lh_first) {
587 LIST_REMOVE(inp, inp_hash);
588 LIST_INSERT_HEAD(head, inp, inp_hash);
589 }
590 break;
591 }
592 }
593 #ifdef DIAGNOSTIC
594 if (inp == NULL && in_pcbnotifymiss) {
595 printf("in_pcbhashlookup: faddr=%08x fport=%d laddr=%08x lport=%d\n",
596 ntohl(faddr.s_addr), ntohs(fport),
597 ntohl(laddr.s_addr), ntohs(lport));
598 }
599 #endif
600 return (inp);
601 }
602