in_selsrc.c revision 1.13 1 /* $NetBSD: in_selsrc.c,v 1.13 2015/06/08 07:59:54 roy Exp $ */
2
3 /*-
4 * Copyright (c) 2005 David Young. All rights reserved.
5 *
6 * This code was written by David Young.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY DAVID YOUNG ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
19 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
20 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
21 * FOUNDATION OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: in_selsrc.c,v 1.13 2015/06/08 07:59:54 roy Exp $");
33
34 #include "opt_inet.h"
35 #include "opt_inet_conf.h"
36
37 #include <lib/libkern/libkern.h>
38
39 #include <sys/param.h>
40 #include <sys/ioctl.h>
41 #include <sys/errno.h>
42 #include <sys/malloc.h>
43 #include <sys/socket.h>
44 #include <sys/socketvar.h>
45 #include <sys/sysctl.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/syslog.h>
49
50 #include <net/if.h>
51
52 #include <net/if_ether.h>
53
54 #include <netinet/in_systm.h>
55 #include <netinet/in.h>
56 #include <netinet/in_var.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip_var.h>
59 #include <netinet/in_ifattach.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/if_inarp.h>
62 #include <netinet/ip_mroute.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/in_selsrc.h>
65
66 #ifdef INET
67 struct score_src_name {
68 const char *sn_name;
69 const in_score_src_t sn_score_src;
70 };
71
72 static const struct sysctlnode *in_domifattach_sysctl(struct in_ifsysctl *);
73 static int in_preference(const struct in_addr *, int, int,
74 const struct in_addr *);
75 static int in_index(const struct in_addr *, int, int, const struct in_addr *);
76 static int in_matchlen(const struct in_addr *, int, int,
77 const struct in_addr *);
78 static int in_match_category(const struct in_addr *, int, int,
79 const struct in_addr *);
80 static size_t in_get_selectsrc(const struct in_ifselsrc *, char *,
81 const size_t);
82 static int in_set_selectsrc(struct in_ifselsrc *, char *buf);
83 static int in_sysctl_selectsrc(SYSCTLFN_PROTO);
84 static in_score_src_t name_to_score_src(const char *);
85 static const char *score_src_to_name(const in_score_src_t);
86 static void in_score(const in_score_src_t *, int *, int *,
87 const struct in_addr *, int, int, const struct in_addr *);
88
89 static const struct score_src_name score_src_names[] = {
90 {"same-category", in_match_category}
91 , {"common-prefix-len", in_matchlen}
92 , {"index", in_index}
93 , {"preference", in_preference}
94 , {NULL, NULL}
95 };
96
97 static const struct in_ifselsrc initial_iss = { 0, {NULL} };
98
99 static struct in_ifselsrc default_iss = { 0, {in_index} };
100
101 #ifdef GETIFA_DEBUG
102 int in_selsrc_debug = 0;
103 #endif /* GETIFA_DEBUG */
104
105 SYSCTL_SETUP(sysctl_selectsrc_setup, "sysctl selectsrc subtree setup")
106 {
107 int rc;
108 const struct sysctlnode *rnode, *cnode;
109
110 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
111 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inet",
112 NULL, NULL, 0, NULL, 0, CTL_NET, PF_INET, CTL_EOL)) != 0) {
113 printf("%s: could not create net.inet, rc = %d\n", __func__,
114 rc);
115 return;
116 }
117 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
118 CTLFLAG_PERMANENT, CTLTYPE_NODE, "ip",
119 NULL, NULL, 0, NULL, 0,
120 CTL_NET, PF_INET, IPPROTO_IP, CTL_EOL)) != 0) {
121 printf("%s: could not create net.inet.ip, rc = %d\n", __func__,
122 rc);
123 return;
124 }
125 if ((rc = sysctl_createv(clog, 0, NULL, &rnode,
126 CTLFLAG_PERMANENT, CTLTYPE_NODE, "selectsrc",
127 NULL, NULL, 0, NULL, 0,
128 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE, CTL_EOL)) != 0) {
129 printf("%s: could not create net.inet.ip.selectsrc, "
130 "rc = %d\n", __func__, rc);
131 return;
132 }
133 #ifdef GETIFA_DEBUG
134 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
135 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, CTLTYPE_INT, "debug",
136 SYSCTL_DESCR("enable source-selection debug messages"),
137 NULL, 0, &in_selsrc_debug, 0, CTL_CREATE, CTL_EOL)) != 0) {
138 printf("%s: could not create net.inet.ip.selectsrc.debug, "
139 "rc = %d\n", __func__, rc);
140 return;
141 }
142 #endif /* GETIFA_DEBUG */
143 if ((rc = sysctl_createv(clog, 0, &rnode, &cnode,
144 CTLFLAG_READWRITE, CTLTYPE_STRUCT, "default",
145 SYSCTL_DESCR("default source selection policy"),
146 in_sysctl_selectsrc, 0, &default_iss, IN_SELECTSRC_LEN,
147 CTL_CREATE, CTL_EOL)) != 0) {
148 printf(
149 "%s: could not create net.inet.ip.selectsrc.default (%d)\n",
150 __func__, rc);
151 return;
152 }
153 }
154
155 /*
156 * Score by address preference: prefer addresses with higher preference
157 * number. Preference numbers are assigned with ioctl SIOCSIFADDRPREF.
158 */
159 static int
160 in_preference(const struct in_addr *src, int preference,
161 int idx, const struct in_addr *dst)
162 {
163 return preference;
164 }
165
166 /*
167 * Score by address "index": prefer addresses nearer the head of
168 * the ifaddr list.
169 */
170 static int
171 in_index(const struct in_addr *src, int preference, int idx,
172 const struct in_addr *dst)
173 {
174 return -idx;
175 }
176
177 /*
178 * Length of longest common prefix of src and dst.
179 *
180 * (Derived from in6_matchlen.)
181 */
182 static int
183 in_matchlen(const struct in_addr *src, int preference,
184 int idx, const struct in_addr *dst)
185 {
186 int match = 0;
187 const uint8_t *s = (const uint8_t *)src, *d = (const uint8_t *)dst;
188 const uint8_t *lim = s + 4;
189 uint_fast8_t r = 0;
190
191 while (s < lim && (r = (*d++ ^ *s++)) == 0)
192 match += 8;
193
194 if (s == lim)
195 return match;
196
197 while ((r & 0x80) == 0) {
198 match++;
199 r <<= 1;
200 }
201 return match;
202 }
203
204 static enum in_category
205 in_categorize(const struct in_addr *s)
206 {
207 if (IN_ANY_LOCAL(s->s_addr))
208 return IN_CATEGORY_LINKLOCAL;
209 else if (IN_PRIVATE(s->s_addr))
210 return IN_CATEGORY_PRIVATE;
211 else
212 return IN_CATEGORY_OTHER;
213 }
214
215 static int
216 in_match_category(const struct in_addr *src, int preference,
217 int idx, const struct in_addr *dst)
218 {
219 enum in_category dst_c = in_categorize(dst),
220 src_c = in_categorize(src);
221 #ifdef GETIFA_DEBUG
222 if (in_selsrc_debug) {
223 printf("%s: dst %#08" PRIx32 " categ %d, src %#08" PRIx32
224 " categ %d\n", __func__, ntohl(dst->s_addr), dst_c,
225 ntohl(src->s_addr), src_c);
226 }
227 #endif /* GETIFA_DEBUG */
228
229 if (dst_c == src_c)
230 return 2;
231 else if (dst_c == IN_CATEGORY_LINKLOCAL && src_c == IN_CATEGORY_PRIVATE)
232 return 1;
233 else if (dst_c == IN_CATEGORY_PRIVATE && src_c == IN_CATEGORY_LINKLOCAL)
234 return 1;
235 else if (dst_c == IN_CATEGORY_OTHER && src_c == IN_CATEGORY_PRIVATE)
236 return 1;
237 else
238 return 0;
239 }
240
241 static void
242 in_score(const in_score_src_t *score_src, int *score, int *scorelenp,
243 const struct in_addr *src, int preference, int idx,
244 const struct in_addr *dst)
245 {
246 int i;
247
248 for (i = 0; i < IN_SCORE_SRC_MAX && score_src[i] != NULL; i++)
249 score[i] = (*score_src[i])(src, preference, idx, dst);
250 if (scorelenp != NULL)
251 *scorelenp = i;
252 }
253
254 static int
255 in_score_cmp(int *score1, int *score2, int scorelen)
256 {
257 int i;
258
259 for (i = 0; i < scorelen; i++) {
260 if (score1[i] == score2[i])
261 continue;
262 return score1[i] - score2[i];
263 }
264 return 0;
265 }
266
267 #ifdef GETIFA_DEBUG
268 static void
269 in_score_println(int *score, int scorelen)
270 {
271 int i;
272 const char *delim = "[";
273
274 for (i = 0; i < scorelen; i++) {
275 printf("%s%d", delim, score[i]);
276 delim = ", ";
277 }
278 printf("]\n");
279 }
280 #endif /* GETIFA_DEBUG */
281
282 /* Scan the interface addresses on the interface ifa->ifa_ifp for
283 * the source address that best matches the destination, dst0,
284 * according to the source address-selection policy for this
285 * interface. If there is no better match than `ifa', return `ifa'.
286 * Otherwise, return the best address.
287 *
288 * Note that in_getifa is called after the kernel has decided which
289 * output interface to use (ifa->ifa_ifp), and in_getifa will not
290 * scan an address belonging to any other interface.
291 */
292 struct ifaddr *
293 in_getifa(struct ifaddr *ifa, const struct sockaddr *dst0)
294 {
295 const in_score_src_t *score_src;
296 int idx, scorelen;
297 const struct sockaddr_in *dst, *src;
298 struct ifaddr *alt_ifa, *best_ifa;
299 struct ifnet *ifp;
300 struct in_ifsysctl *isc;
301 struct in_ifselsrc *iss;
302 int best_score[IN_SCORE_SRC_MAX], score[IN_SCORE_SRC_MAX];
303 struct in_ifaddr *ia;
304
305 if (ifa->ifa_addr->sa_family != AF_INET ||
306 dst0 == NULL || dst0->sa_family != AF_INET) { /* Possible. */
307 ifa->ifa_seqno = NULL;
308 return ifa;
309 }
310
311 ifp = ifa->ifa_ifp;
312 isc = (struct in_ifsysctl *)ifp->if_afdata[AF_INET];
313 if (isc != NULL && isc->isc_selsrc != NULL &&
314 isc->isc_selsrc->iss_score_src[0] != NULL)
315 iss = isc->isc_selsrc;
316 else
317 iss = &default_iss;
318 score_src = &iss->iss_score_src[0];
319
320 dst = (const struct sockaddr_in *)dst0;
321
322 best_ifa = ifa;
323
324 /* Find out the index of this ifaddr. */
325 idx = 0;
326 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
327 if (alt_ifa == best_ifa)
328 break;
329 idx++;
330 }
331 in_score(score_src, best_score, &scorelen, &IA_SIN(best_ifa)->sin_addr,
332 best_ifa->ifa_preference, idx, &dst->sin_addr);
333
334 #ifdef GETIFA_DEBUG
335 if (in_selsrc_debug) {
336 printf("%s: enter dst %#" PRIx32 " src %#" PRIx32 " score ",
337 __func__, ntohl(dst->sin_addr.s_addr),
338 ntohl(satosin(best_ifa->ifa_addr)->sin_addr.s_addr));
339 in_score_println(best_score, scorelen);
340 }
341 #endif /* GETIFA_DEBUG */
342
343 idx = -1;
344 IFADDR_FOREACH(alt_ifa, ifa->ifa_ifp) {
345 ++idx;
346 src = IA_SIN(alt_ifa);
347
348 if (alt_ifa == ifa || src->sin_family != AF_INET)
349 continue;
350 ia = (struct in_ifaddr *)alt_ifa;
351 if (ia->ia4_flags & IN_IFF_NOTREADY)
352 continue;
353
354 in_score(score_src, score, NULL, &src->sin_addr,
355 alt_ifa->ifa_preference, idx, &dst->sin_addr);
356
357 #ifdef GETIFA_DEBUG
358 if (in_selsrc_debug) {
359 printf("%s: src %#" PRIx32 " score ", __func__,
360 ntohl(src->sin_addr.s_addr));
361 in_score_println(score, scorelen);
362 }
363 #endif /* GETIFA_DEBUG */
364
365 if (in_score_cmp(score, best_score, scorelen) > 0) {
366 (void)memcpy(best_score, score, sizeof(best_score));
367 best_ifa = alt_ifa;
368 }
369 }
370
371 ia = (struct in_ifaddr *)best_ifa;
372 if (ia->ia4_flags & IN_IFF_NOTREADY)
373 return NULL;
374
375 #ifdef GETIFA_DEBUG
376 if (in_selsrc_debug) {
377 printf("%s: choose src %#" PRIx32 " score ", __func__,
378 ntohl(IA_SIN(best_ifa)->sin_addr.s_addr));
379 in_score_println(best_score, scorelen);
380 }
381 #endif /* GETIFA_DEBUG */
382
383 best_ifa->ifa_seqno = &iss->iss_seqno;
384 return best_ifa;
385 }
386
387 static in_score_src_t
388 name_to_score_src(const char *name)
389 {
390 int i;
391
392 for (i = 0; score_src_names[i].sn_name != NULL; i++) {
393 if (strcmp(score_src_names[i].sn_name, name) == 0)
394 return score_src_names[i].sn_score_src;
395 }
396 return NULL;
397 }
398
399 static const char *
400 score_src_to_name(const in_score_src_t score_src)
401 {
402 int i;
403 for (i = 0; score_src_names[i].sn_name != NULL; i++) {
404 if (score_src == score_src_names[i].sn_score_src)
405 return score_src_names[i].sn_name;
406 }
407 return "<unknown>";
408 }
409
410 static size_t
411 in_get_selectsrc(const struct in_ifselsrc *iss, char *buf0,
412 const size_t buflen0)
413 {
414 int i, rc;
415 char *buf = buf0;
416 const char *delim;
417 size_t buflen = buflen0;
418
419 KASSERT(buflen >= 1);
420
421 for (delim = "", i = 0;
422 i < IN_SCORE_SRC_MAX && iss->iss_score_src[i] != NULL;
423 delim = ",", i++) {
424 rc = snprintf(buf, buflen, "%s%s",
425 delim, score_src_to_name(iss->iss_score_src[i]));
426 if (rc == -1)
427 return buflen0 - buflen;
428 if (rc >= buflen)
429 return buflen0 + rc - buflen;
430 buf += rc;
431 buflen -= rc;
432 }
433 if (buf == buf0)
434 *buf++ = '\0';
435 return buf - buf0;
436 }
437
438 static int
439 in_set_selectsrc(struct in_ifselsrc *iss, char *buf)
440 {
441 int i, s;
442 char *next = buf;
443 const char *name;
444 in_score_src_t score_src;
445 in_score_src_t scorers[IN_SCORE_SRC_MAX];
446
447 memset(&scorers, 0, sizeof(scorers));
448 for (i = 0;
449 (name = strsep(&next, ",")) != NULL && i < IN_SCORE_SRC_MAX;
450 i++) {
451 if (strcmp(name, "") == 0)
452 break;
453 if ((score_src = name_to_score_src(name)) == NULL)
454 return EINVAL;
455 scorers[i] = score_src;
456 }
457 if (i == IN_SCORE_SRC_MAX && name != NULL)
458 return EFBIG;
459 s = splnet();
460 (void)memcpy(iss->iss_score_src, scorers, sizeof(iss->iss_score_src));
461 /* If iss affects a specific interface that used to use
462 * the default policy, increase the sequence number on the
463 * default policy, forcing routes that cache a source
464 * (rt_ifa) found by the default policy to refresh their
465 * cache.
466 */
467 if (iss != &default_iss && iss->iss_score_src[0] == NULL &&
468 scorers[0] != NULL)
469 default_iss.iss_seqno++;
470 iss->iss_seqno++;
471 splx(s);
472 return 0;
473 }
474
475 /*
476 * sysctl helper routine for net.inet.ip.interfaces.<interface>.selectsrc.
477 * Pulls the old value out as a human-readable string, interprets
478 * and records the new value.
479 */
480 static int
481 in_sysctl_selectsrc(SYSCTLFN_ARGS)
482 {
483 char policy[IN_SELECTSRC_LEN];
484 int error;
485 struct sysctlnode node;
486 struct in_ifselsrc *iss;
487
488 node = *rnode;
489 iss = (struct in_ifselsrc *)node.sysctl_data;
490 if (oldp != NULL &&
491 (error = in_get_selectsrc(iss, policy, sizeof(policy))) >= sizeof(policy))
492 return error;
493 node.sysctl_data = &policy[0];
494 error = sysctl_lookup(SYSCTLFN_CALL(&node));
495 if (error || newp == NULL)
496 return (error);
497
498 return in_set_selectsrc(iss, policy);
499 }
500
501 static const struct sysctlnode *
502 in_domifattach_sysctl(struct in_ifsysctl *isc)
503 {
504 int rc;
505 const struct sysctlnode *rnode;
506
507 if ((rc = sysctl_createv(&isc->isc_log, 0, NULL, &rnode,
508 CTLFLAG_READONLY, CTLTYPE_NODE,
509 "interfaces", NULL,
510 NULL, 0, NULL, 0,
511 CTL_NET, PF_INET, IPPROTO_IP, CTL_CREATE,
512 CTL_EOL)) != 0) {
513 printf("%s: could not create net.inet.ip.interfaces, rc = %d\n",
514 __func__, rc);
515 return NULL;
516 }
517 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
518 CTLFLAG_READONLY, CTLTYPE_NODE,
519 isc->isc_ifp->if_xname,
520 SYSCTL_DESCR("interface ip options"),
521 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL)) != 0) {
522 printf("%s: could not create net.inet.ip.interfaces.%s, "
523 "rc = %d\n", __func__, isc->isc_ifp->if_xname, rc);
524 goto err;
525 }
526 if ((rc = sysctl_createv(&isc->isc_log, 0, &rnode, &rnode,
527 CTLFLAG_READWRITE, CTLTYPE_STRING,
528 "selectsrc",
529 SYSCTL_DESCR("source selection policy"),
530 in_sysctl_selectsrc, 0,
531 (void *)isc->isc_selsrc, IN_SELECTSRC_LEN,
532 CTL_CREATE, CTL_EOL)) != 0) {
533 printf(
534 "%s: could not create net.inet.ip.%s.selectsrc, rc = %d\n",
535 __func__, isc->isc_ifp->if_xname, rc);
536 goto err;
537 }
538 return rnode;
539 err:
540 sysctl_teardown(&isc->isc_log);
541 return NULL;
542 }
543
544 void *
545 in_domifattach(struct ifnet *ifp)
546 {
547 struct in_ifsysctl *isc;
548 struct in_ifselsrc *iss;
549
550 isc = (struct in_ifsysctl *)malloc(sizeof(*isc), M_IFADDR,
551 M_WAITOK | M_ZERO);
552
553 iss = (struct in_ifselsrc *)malloc(sizeof(*iss), M_IFADDR,
554 M_WAITOK | M_ZERO);
555
556 memcpy(&iss->iss_score_src[0], &initial_iss.iss_score_src[0],
557 MIN(sizeof(iss->iss_score_src), sizeof(initial_iss.iss_score_src)));
558
559 isc->isc_ifp = ifp;
560 isc->isc_selsrc = iss;
561
562 if (in_domifattach_sysctl(isc) == NULL)
563 goto err;
564
565 return isc;
566 err:
567 free(iss, M_IFADDR);
568 free(isc, M_IFADDR);
569 return NULL;
570 }
571
572 void
573 in_domifdetach(struct ifnet *ifp, void *aux)
574 {
575 struct in_ifsysctl *isc;
576 struct in_ifselsrc *iss;
577
578 if (aux == NULL)
579 return;
580 isc = (struct in_ifsysctl *)aux;
581 iss = isc->isc_selsrc;
582 sysctl_teardown(&isc->isc_log);
583 free(isc, M_IFADDR);
584 free(iss, M_IFADDR);
585 }
586 #endif /* INET */
587