in_selsrc.c revision 1.3 1 /* $NetBSD: in_selsrc.c,v 1.3 2006/11/16 01:33:45 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2005 David Young. All rights reserved.
5 *
6 * This code was written by David Young.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by David Young.
19 * 4. The name of David Young may not be used to endorse or promote
20 * products derived from this software without specific prior
21 * written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
24 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
25 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
26 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
34 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */
36
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.3 2006/11/16 01:33:45 christos Exp $");
39
40 #include "opt_inet.h"
41 #include "opt_inet_conf.h"
42
43 #include <lib/libkern/libkern.h>
44
45 #include <sys/param.h>
46 #include <sys/ioctl.h>
47 #include <sys/errno.h>
48 #include <sys/malloc.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/sysctl.h>
52 #include <sys/systm.h>
53 #include <sys/proc.h>
54 #include <sys/syslog.h>
55
56 #include <net/if.h>
57
58 #include <net/if_ether.h>
59
60 #include <netinet/in_systm.h>
61 #include <netinet/in.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/in_ifattach.h>
66 #include <netinet/in_pcb.h>
67 #include <netinet/if_inarp.h>
68 #include <netinet/ip_mroute.h>
69 #include <netinet/igmp_var.h>
70 #include <netinet/in_selsrc.h>
71
72 #ifdef INET
73 struct score_src_name {
74 const char *sn_name;
75 const in_score_src_t sn_score_src;
76 };
77
78 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
79 static int in_preference(const struct in_addr *, int, int,
80 const struct in_addr *);
81 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
82 static int in_matchlen(const struct in_addr *, int, int,
83 const struct in_addr *);
84 static int in_match_category(const struct in_addr *, int, int,
85 const struct in_addr *);
86 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
87 const size_t);
88 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
89 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
90 static in_score_src_t name_to_score_src(const char *);
91 static const char *score_src_to_name(const in_score_src_t);
92 static void in_score(const in_score_src_t *, int *, int *,
93 const struct in_addr *, int, int, const struct in_addr *);
94
95 static const struct score_src_name score_src_names[] = {
96 {"same-category", in_match_category}
97 , {"common-prefix-len", in_matchlen}
98 , {"index", in_index}
99 , {"preference", in_preference}
100 , {NULL, NULL}
101 };
102
103 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
104
105 static struct in_ifselsrc default_iss = { 0, {in_index} };
106
107 #ifdef GETIFA_DEBUG
108 int in_selsrc_debug = 0;
109
110 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
111 {
112 int rc;
113 const struct sysctlnode *rnode, *cnode;
114
115 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
116 CTLFLAG_PERMANENT, CTLTYPE_NODE, "net",
117 NULL, NULL, 0, NULL, 0, CTL_NET, CTL_EOL)) != 0) {
118 printf("%s: could not create net, rc = %d\n", __func__, rc);
119 return;
120 }
121 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
122 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
123 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
124 printf("%s: could not create net.inet, rc = %d\n", __func__,
125 rc);
126 return;
127 }
128 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
129 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
130 NULL, NULL, 0, NULL, 0,
131 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
132 printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
133 rc);
134 return;
135 }
136 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
137 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
138 NULL, NULL, 0, NULL, 0,
139 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
140 printf("%s: could not create net.inet.ip.selectsrc, "
141 "rc = %d\n", __func__, rc);
142 return;
143 }
144 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
145 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
146 SYSCTL_DESCR("enable source-selection debug messages"),
147 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
148 printf("%s: could not create net.inet.ip.selectsrc.debug, "
149 "rc = %d\n", __func__, rc);
150 return;
151 }
152 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
153 CTLFLAG_READWRITE, CTLTYPE_STRING, "default",
154 SYSCTL_DESCR("default source selection policy"),
155 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
156 CTL_CREATE, CTL_EOL)) != 0) {
157 printf(
158 "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
159 __func__, rc);
160 return;
161 }
162 }
163 #endif /* GETIFA_DEBUG */
164
165 /*
166 * Score by address preference: prefer addresses with lower preference
167 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
168 */
169 static int
170 in_preference(const struct in_addr *src, int preference,
171 int idx, const struct in_addr *dst)
172 {
173 return -preference;
174 }
175
176 /*
177 * Score by address "index": prefer addresses nearer the head of
178 * the ifaddr list.
179 */
180 static int
181 in_index(const struct in_addr *src, int preference, int idx,
182 const struct in_addr *dst)
183 {
184 return -idx;
185 }
186
187 /*
188 * Length of longest common prefix of src and dst.
189 *
190 * (Derived from in6_matchlen.)
191 */
192 static int
193 in_matchlen(const struct in_addr *src, int preference,
194 int idx, const struct in_addr *dst)
195 {
196 int match = 0;
197 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
198 const uint8_t *lim = s + 4;
199 uint_fast8_t r = 0;
200
201 while (s < lim && (r = (*d++ ^ *s++)) == 0)
202 match += 8;
203
204 if (s == lim)
205 return match;
206
207 while ((r & 0x80) == 0) {
208 match++;
209 r <<= 1;
210 }
211 return match;
212 }
213
214 static enum in_category
215 in_categorize(const struct in_addr *s)
216 {
217 if (IN_ANY_LOCAL(s->s_addr))
218 return IN_CATEGORY_LINKLOCAL;
219 else if (IN_PRIVATE(s->s_addr))
220 return IN_CATEGORY_PRIVATE;
221 else
222 return IN_CATEGORY_OTHER;
223 }
224
225 static int
226 in_match_category(const struct in_addr *src, int preference,
227 int idx, const struct in_addr *dst)
228 {
229 enum in_category dst_c = in_categorize(dst),
230 src_c = in_categorize(src);
231 #ifdef GETIFA_DEBUG
232 if (in_selsrc_debug) {
233 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
234 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
235 ntohl(src->s_addr), src_c);
236 }
237 #endif /* GETIFA_DEBUG */
238
239 if (dst_c == src_c)
240 return 2;
241 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
242 return 1;
243 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
244 return 1;
245 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
246 return 1;
247 else
248 return 0;
249 }
250
251 static void
252 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
253 const struct in_addr *src, int preference, int idx,
254 const struct in_addr *dst)
255 {
256 int i;
257
258 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
259 score[i] = (*score_src[i])(src, preference, idx, dst);
260 if (scorelenp != NULL)
261 *scorelenp = i;
262 }
263
264 static int
265 in_score_cmp(int *score1, int *score2, int scorelen)
266 {
267 int i;
268
269 for (i = 0; i < scorelen; i++) {
270 if (score1[i] == score2[i])
271 continue;
272 return score1[i] - score2[i];
273 }
274 return 0;
275 }
276
277 #ifdef GETIFA_DEBUG
278 static void
279 in_score_println(int *score, int scorelen)
280 {
281 int i;
282 const char *delim = "[";
283
284 for (i = 0; i < scorelen; i++) {
285 printf("%s%d", delim, score[i]);
286 delim = ", ";
287 }
288 printf("]\n");
289 }
290 #endif /* GETIFA_DEBUG */
291
292 /* Scan the interface addresses on the interface ifa->ifa_ifp for
293 * the source address that best matches the destination, dst0,
294 * according to the source address-selection policy for this
295 * interface. If there is no better match than `ifa', return `ifa'.
296 * Otherwise, return the best address.
297 *
298 * Note that in_getifa is called after the kernel has decided which
299 * output interface to use (ifa->ifa_ifp), and in_getifa will not
300 * scan an address belonging to any other interface.
301 */
302 struct ifaddr *
303 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
304 {
305 const in_score_src_t *score_src;
306 int idx, scorelen;
307 const struct sockaddr_in *dst, *src;
308 struct ifaddr *alt_ifa, *best_ifa;
309 struct ifnet *ifp;
310 struct in_ifsysctl *isc;
311 struct in_ifselsrc *iss;
312 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
313
314 if (ifa->ifa_addr->sa_family != AF_INET ||
315 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */
316 ifa->ifa_seqno = NULL;
317 return ifa;
318 }
319
320 ifp = ifa->ifa_ifp;
321 isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
322 if (isc != NULL && isc->isc_selsrc != NULL &&
323 isc->isc_selsrc->iss_score_src[0] != NULL)
324 iss = isc->isc_selsrc;
325 else
326 iss = &default_iss;
327 score_src = &iss->iss_score_src[0];
328
329 dst = (const struct sockaddr_in *)dst0;
330
331 best_ifa = ifa;
332
333 /* Find out the index of this ifaddr. */
334 idx = 0;
335 TAILQ_FOREACH(alt_ifa, &ifa->ifa_ifp->if_addrlist, ifa_list) {
336 if (alt_ifa == best_ifa)
337 break;
338 idx++;
339 }
340 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
341 best_ifa->ifa_preference, idx, &dst->sin_addr);
342
343 #ifdef GETIFA_DEBUG
344 if (in_selsrc_debug) {
345 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
346 __func__, ntohl(dst->sin_addr.s_addr),
347 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
348 in_score_println(best_score, scorelen);
349 }
350 #endif /* GETIFA_DEBUG */
351
352 idx = -1;
353 TAILQ_FOREACH(alt_ifa, &ifa->ifa_ifp->if_addrlist, ifa_list) {
354 ++idx;
355 src = IA_SIN(alt_ifa);
356
357 if (alt_ifa == ifa || src->sin_family != AF_INET)
358 continue;
359
360 in_score(score_src, score, NULL, &src->sin_addr,
361 alt_ifa->ifa_preference, idx, &dst->sin_addr);
362
363 #ifdef GETIFA_DEBUG
364 if (in_selsrc_debug) {
365 printf("%s: src %#" PRIx32 " score ", __func__,
366 ntohl(src->sin_addr.s_addr));
367 in_score_println(score, scorelen);
368 }
369 #endif /* GETIFA_DEBUG */
370
371 if (in_score_cmp(score, best_score, scorelen) > 0) {
372 (void)memcpy(best_score, score, sizeof(best_score));
373 best_ifa = alt_ifa;
374 }
375 }
376 #ifdef GETIFA_DEBUG
377 if (in_selsrc_debug) {
378 printf("%s: choose src %#" PRIx32 " score ", __func__,
379 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
380 in_score_println(best_score, scorelen);
381 }
382 #endif /* GETIFA_DEBUG */
383
384 best_ifa->ifa_seqno = &iss->iss_seqno;
385 return best_ifa;
386 }
387
388 static in_score_src_t
389 name_to_score_src(const char *name)
390 {
391 int i;
392
393 for (i = 0; score_src_names[i].sn_name != NULL; i++) {
394 if (strcmp(score_src_names[i].sn_name, name) == 0)
395 return score_src_names[i].sn_score_src;
396 }
397 return NULL;
398 }
399
400 static const char *
401 score_src_to_name(const in_score_src_t score_src)
402 {
403 int i;
404 for (i = 0; score_src_names[i].sn_name != NULL; i++) {
405 if (score_src == score_src_names[i].sn_score_src)
406 return score_src_names[i].sn_name;
407 }
408 return "<unknown>";
409 }
410
411 static size_t
412 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
413 const size_t buflen0)
414 {
415 int i, rc;
416 char *buf = buf0;
417 const char *delim;
418 size_t buflen = buflen0;
419
420 KASSERT(buflen >= 1);
421
422 for (delim = "", i = 0;
423 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
424 delim = ",", i++) {
425 rc = snprintf(buf, buflen, "%s%s",
426 delim, score_src_to_name(iss->iss_score_src[i]));
427 if (rc == -1)
428 return buflen0 - buflen;
429 if (rc >= buflen)
430 return buflen0 + rc - buflen;
431 buf += rc;
432 buflen -= rc;
433 }
434 if (buf == buf0)
435 *buf++ = '\0';
436 return buf - buf0;
437 }
438
439 static int
440 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
441 {
442 int i, s;
443 char *next = buf;
444 const char *name;
445 in_score_src_t score_src;
446 in_score_src_t scorers[IN_SCORE_SRC_MAX];
447
448 memset(&scorers, 0, sizeof(scorers));
449 for (i = 0;
450 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
451 i++) {
452 if (strcmp(name, "") == 0)
453 break;
454 if ((score_src = name_to_score_src(name)) == NULL)
455 return EINVAL;
456 scorers[i] = score_src;
457 }
458 if (i == IN_SCORE_SRC_MAX && name != NULL)
459 return EFBIG;
460 s = splnet();
461 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
462 /* If iss affects a specific interface that used to use
463 * the default policy, increase the sequence number on the
464 * default policy, forcing routes that cache a source
465 * (rt_ifa) found by the default policy to refresh their
466 * cache.
467 */
468 if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
469 scorers[0] != NULL)
470 default_iss.iss_seqno++;
471 iss->iss_seqno++;
472 splx(s);
473 return 0;
474 }
475
476 /*
477 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
478 * Pulls the old value out as a human-readable string, interprets
479 * and records the new value.
480 */
481 static int
482 in_sysctl_selectsrc(SYSCTLFN_ARGS)
483 {
484 char policy[IN_SELECTSRC_LEN];
485 int error;
486 struct sysctlnode node;
487 struct in_ifselsrc *iss;
488
489 node = *rnode;
490 iss = (struct in_ifselsrc *)node.sysctl_data;
491 if (oldp != NULL &&
492 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
493 return error;
494 node.sysctl_data = &policy[0];
495 error = sysctl_lookup(SYSCTLFN_CALL(&node));
496 if (error || newp == NULL)
497 return (error);
498
499 return in_set_selectsrc(iss, policy);
500 }
501
502 static const struct sysctlnode *
503 in_domifattach_sysctl(struct in_ifsysctl *isc)
504 {
505 int rc;
506 const struct sysctlnode *rnode;
507
508 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
509 CTLFLAG_READWRITE, CTLTYPE_NODE,
510 "interfaces", NULL,
511 NULL, 0, NULL, 0,
512 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
513 CTL_EOL)) != 0) {
514 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
515 __func__, rc);
516 return NULL;
517 }
518 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
519 CTLFLAG_READWRITE, CTLTYPE_NODE,
520 isc->isc_ifp->if_xname,
521 SYSCTL_DESCR("interface ip options"),
522 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
523 printf("%s: could not create net.inet.ip.interfaces.%s, "
524 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
525 goto err;
526 }
527 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
528 CTLFLAG_READWRITE, CTLTYPE_STRING,
529 "selectsrc",
530 SYSCTL_DESCR("source selection policy"),
531 in_sysctl_selectsrc, 0,
532 isc->isc_selsrc, IN_SELECTSRC_LEN,
533 CTL_CREATE, CTL_EOL)) != 0) {
534 printf(
535 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
536 __func__, isc->isc_ifp->if_xname, rc);
537 goto err;
538 }
539 return rnode;
540 err:
541 sysctl_teardown(&isc->isc_log);
542 return NULL;
543 }
544
545 void *
546 in_domifattach(struct ifnet *ifp)
547 {
548 struct in_ifsysctl *isc;
549 struct in_ifselsrc *iss;
550
551 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
552 M_WAITOK | M_ZERO);
553
554 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
555 M_WAITOK | M_ZERO);
556
557 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
558 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
559
560 isc->isc_ifp = ifp;
561 isc->isc_selsrc = iss;
562
563 if (in_domifattach_sysctl(isc) == NULL)
564 goto err;
565
566 return isc;
567 err:
568 free(iss, M_IFADDR);
569 free(isc, M_IFADDR);
570 return NULL;
571 }
572
573 void
574 in_domifdetach(struct ifnet *ifp, void *aux)
575 {
576 struct in_ifsysctl *isc;
577 struct in_ifselsrc *iss;
578
579 if (aux == NULL)
580 return;
581 isc = (struct in_ifsysctl *)aux;
582 iss = isc->isc_selsrc;
583 sysctl_teardown(&isc->isc_log);
584 free(isc, M_IFADDR);
585 free(iss, M_IFADDR);
586 }
587 #endif /* INET */
588