if_laggproto.c revision 1.4 1 /* $NetBSD: if_laggproto.c,v 1.4 2022/03/31 03:05:41 yamaguchi Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5 *
6 * Copyright (c)2021 Internet Initiative Japan, Inc.
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __KERNEL_RCSID(0, "$NetBSD: if_laggproto.c,v 1.4 2022/03/31 03:05:41 yamaguchi Exp $");
33
34 #include <sys/param.h>
35 #include <sys/types.h>
36
37 #include <sys/evcnt.h>
38 #include <sys/kmem.h>
39 #include <sys/mbuf.h>
40 #include <sys/mutex.h>
41 #include <sys/pslist.h>
42 #include <sys/syslog.h>
43 #include <sys/workqueue.h>
44
45 #include <net/if.h>
46 #include <net/if_ether.h>
47 #include <net/if_media.h>
48
49 #include <net/lagg/if_lagg.h>
50 #include <net/lagg/if_laggproto.h>
51
52 struct lagg_proto_softc {
53 struct lagg_softc *psc_softc;
54 struct pslist_head psc_ports;
55 kmutex_t psc_lock;
56 pserialize_t psc_psz;
57 size_t psc_ctxsiz;
58 void *psc_ctx;
59 size_t psc_nactports;
60 };
61
62 /*
63 * Locking notes:
64 * - Items of struct lagg_proto_softc is protected by
65 * psc_lock (an adaptive mutex)
66 * - psc_ports is protected by pserialize (psc_psz)
67 * - Updates of psc_ports is serialized by sc_lock in
68 * struct lagg_softc
69 * - Other locking notes are described in if_laggproto.h
70 */
71
72 struct lagg_failover {
73 bool fo_rx_all;
74 };
75
76 struct lagg_portmap {
77 struct lagg_port *pm_ports[LAGG_MAX_PORTS];
78 size_t pm_nports;
79 };
80
81 struct lagg_portmaps {
82 struct lagg_portmap maps_pmap[2];
83 size_t maps_activepmap;
84 };
85
86 struct lagg_lb {
87 struct lagg_portmaps lb_pmaps;
88 };
89
90 struct lagg_proto_port {
91 struct pslist_entry lpp_entry;
92 struct lagg_port *lpp_laggport;
93 bool lpp_active;
94 };
95
96 #define LAGG_PROTO_LOCK(_psc) mutex_enter(&(_psc)->psc_lock)
97 #define LAGG_PROTO_UNLOCK(_psc) mutex_exit(&(_psc)->psc_lock)
98 #define LAGG_PROTO_LOCKED(_psc) mutex_owned(&(_psc)->psc_lock)
99
100 static struct lagg_proto_softc *
101 lagg_proto_alloc(lagg_proto, struct lagg_softc *);
102 static void lagg_proto_free(struct lagg_proto_softc *);
103 static void lagg_proto_insert_port(struct lagg_proto_softc *,
104 struct lagg_proto_port *);
105 static void lagg_proto_remove_port(struct lagg_proto_softc *,
106 struct lagg_proto_port *);
107 static struct lagg_port *
108 lagg_link_active(struct lagg_proto_softc *psc,
109 struct lagg_proto_port *, struct psref *);
110
111 static inline struct lagg_portmap *
112 lagg_portmap_active(struct lagg_portmaps *maps)
113 {
114 size_t i;
115
116 i = atomic_load_consume(&maps->maps_activepmap);
117
118 return &maps->maps_pmap[i];
119 }
120
121 static inline struct lagg_portmap *
122 lagg_portmap_next(struct lagg_portmaps *maps)
123 {
124 size_t i;
125
126 i = atomic_load_consume(&maps->maps_activepmap);
127 i &= 0x1;
128 i ^= 0x1;
129
130 return &maps->maps_pmap[i];
131 }
132
133 static inline void
134 lagg_portmap_switch(struct lagg_portmaps *maps)
135 {
136 size_t i;
137
138 i = atomic_load_consume(&maps->maps_activepmap);
139 i &= 0x1;
140 i ^= 0x1;
141
142 atomic_store_release(&maps->maps_activepmap, i);
143 }
144
145 static struct lagg_proto_softc *
146 lagg_proto_alloc(lagg_proto pr, struct lagg_softc *sc)
147 {
148 struct lagg_proto_softc *psc;
149 size_t ctxsiz;
150
151 switch (pr) {
152 case LAGG_PROTO_FAILOVER:
153 ctxsiz = sizeof(struct lagg_failover);
154 break;
155 case LAGG_PROTO_LOADBALANCE:
156 ctxsiz = sizeof(struct lagg_lb);
157 break;
158 default:
159 ctxsiz = 0;
160 }
161
162 psc = kmem_zalloc(sizeof(*psc), KM_NOSLEEP);
163 if (psc == NULL)
164 return NULL;
165
166 if (ctxsiz > 0) {
167 psc->psc_ctx = kmem_zalloc(ctxsiz, KM_NOSLEEP);
168 if (psc->psc_ctx == NULL) {
169 kmem_free(psc, sizeof(*psc));
170 return NULL;
171 }
172
173 psc->psc_ctxsiz = ctxsiz;
174 }
175
176 PSLIST_INIT(&psc->psc_ports);
177 psc->psc_psz = pserialize_create();
178 mutex_init(&psc->psc_lock, MUTEX_DEFAULT, IPL_SOFTNET);
179 psc->psc_softc = sc;
180
181 return psc;
182 }
183
184 static void
185 lagg_proto_free(struct lagg_proto_softc *psc)
186 {
187
188 pserialize_destroy(psc->psc_psz);
189 mutex_destroy(&psc->psc_lock);
190
191 if (psc->psc_ctxsiz > 0)
192 kmem_free(psc->psc_ctx, psc->psc_ctxsiz);
193
194 kmem_free(psc, sizeof(*psc));
195 }
196
197 static struct lagg_port *
198 lagg_link_active(struct lagg_proto_softc *psc,
199 struct lagg_proto_port *pport, struct psref *psref)
200 {
201 struct lagg_port *lp;
202 int s;
203
204 lp = NULL;
205 s = pserialize_read_enter();
206
207 for (;pport != NULL;
208 pport = PSLIST_READER_NEXT(pport,
209 struct lagg_proto_port, lpp_entry)) {
210 if (atomic_load_relaxed(&pport->lpp_active)) {
211 lp = pport->lpp_laggport;
212 goto done;
213 }
214 }
215
216 PSLIST_READER_FOREACH(pport, &psc->psc_ports,
217 struct lagg_proto_port, lpp_entry) {
218 if (atomic_load_relaxed(&pport->lpp_active)) {
219 lp = pport->lpp_laggport;
220 break;
221 }
222 }
223 done:
224 if (lp != NULL)
225 lagg_port_getref(lp, psref);
226 pserialize_read_exit(s);
227
228 return lp;
229 }
230
231 int
232 lagg_common_allocport(struct lagg_proto_softc *psc, struct lagg_port *lp)
233 {
234 struct lagg_proto_port *pport;
235
236 KASSERT(LAGG_LOCKED(psc->psc_softc));
237
238 pport = kmem_zalloc(sizeof(*pport), KM_NOSLEEP);
239 if (pport == NULL)
240 return ENOMEM;
241
242 PSLIST_ENTRY_INIT(pport, lpp_entry);
243 pport->lpp_laggport = lp;
244 lp->lp_proto_ctx = (void *)pport;
245 return 0;
246 }
247
248 void
249 lagg_common_freeport(struct lagg_proto_softc *psc, struct lagg_port *lp)
250 {
251 struct lagg_proto_port *pport;
252
253 pport = lp->lp_proto_ctx;
254 lp->lp_proto_ctx = NULL;
255
256 kmem_free(pport, sizeof(*pport));
257 }
258
259 static void
260 lagg_proto_insert_port(struct lagg_proto_softc *psc,
261 struct lagg_proto_port *pport)
262 {
263 struct lagg_proto_port *pport0;
264 struct lagg_port *lp, *lp0;
265 bool insert_after;
266
267 insert_after = false;
268 lp = pport->lpp_laggport;
269
270 LAGG_PROTO_LOCK(psc);
271 PSLIST_WRITER_FOREACH(pport0, &psc->psc_ports,
272 struct lagg_proto_port, lpp_entry) {
273 lp0 = pport0->lpp_laggport;
274 if (lp0->lp_prio > lp->lp_prio)
275 break;
276
277 if (PSLIST_WRITER_NEXT(pport0,
278 struct lagg_proto_port, lpp_entry) == NULL) {
279 insert_after = true;
280 break;
281 }
282 }
283
284 if (pport0 == NULL) {
285 PSLIST_WRITER_INSERT_HEAD(&psc->psc_ports, pport,
286 lpp_entry);
287 } else if (insert_after) {
288 PSLIST_WRITER_INSERT_AFTER(pport0, pport, lpp_entry);
289 } else {
290 PSLIST_WRITER_INSERT_BEFORE(pport0, pport, lpp_entry);
291 }
292 LAGG_PROTO_UNLOCK(psc);
293 }
294
295 static void
296 lagg_proto_remove_port(struct lagg_proto_softc *psc,
297 struct lagg_proto_port *pport)
298 {
299
300 LAGG_PROTO_LOCK(psc);
301 PSLIST_WRITER_REMOVE(pport, lpp_entry);
302 pserialize_perform(psc->psc_psz);
303 LAGG_PROTO_UNLOCK(psc);
304 }
305
306 void
307 lagg_common_startport(struct lagg_proto_softc *psc, struct lagg_port *lp)
308 {
309 struct lagg_proto_port *pport;
310
311 pport = lp->lp_proto_ctx;
312 lagg_proto_insert_port(psc, pport);
313
314 lagg_common_linkstate(psc, lp);
315 }
316
317 void
318 lagg_common_stopport(struct lagg_proto_softc *psc, struct lagg_port *lp)
319 {
320 struct lagg_proto_port *pport;
321 struct ifnet *ifp;
322
323 pport = lp->lp_proto_ctx;
324 lagg_proto_remove_port(psc, pport);
325
326 if (pport->lpp_active) {
327 KASSERT(psc->psc_nactports > 0);
328 psc->psc_nactports--;
329
330 if (psc->psc_nactports == 0) {
331 ifp = &psc->psc_softc->sc_if;
332 if_link_state_change(ifp, LINK_STATE_DOWN);
333 }
334
335 pport->lpp_active = false;
336 }
337 }
338
339 void
340 lagg_common_linkstate(struct lagg_proto_softc *psc, struct lagg_port *lp)
341 {
342 struct lagg_proto_port *pport;
343 struct ifnet *ifp;
344 bool is_active;
345
346 pport = lp->lp_proto_ctx;
347 is_active = lagg_portactive(lp);
348
349 if (pport->lpp_active == is_active)
350 return;
351
352 ifp = &psc->psc_softc->sc_if;
353 if (is_active) {
354 psc->psc_nactports++;
355 if (psc->psc_nactports == 1)
356 if_link_state_change(ifp, LINK_STATE_UP);
357 } else {
358 KASSERT(psc->psc_nactports > 0);
359 psc->psc_nactports--;
360
361 if (psc->psc_nactports == 0)
362 if_link_state_change(ifp, LINK_STATE_DOWN);
363 }
364
365 atomic_store_relaxed(&pport->lpp_active, is_active);
366 }
367
368 void
369 lagg_common_detach(struct lagg_proto_softc *psc)
370 {
371
372 lagg_proto_free(psc);
373 }
374
375 int
376 lagg_none_attach(struct lagg_softc *sc, struct lagg_proto_softc **pscp)
377 {
378
379 *pscp = NULL;
380 return 0;
381 }
382
383 int
384 lagg_none_up(struct lagg_proto_softc *psc __unused)
385 {
386
387 return EBUSY;
388 }
389
390 int
391 lagg_fail_attach(struct lagg_softc *sc, struct lagg_proto_softc **xpsc)
392 {
393 struct lagg_proto_softc *psc;
394 struct lagg_failover *fovr;
395
396 psc = lagg_proto_alloc(LAGG_PROTO_FAILOVER, sc);
397 if (psc == NULL)
398 return ENOMEM;
399
400 fovr = psc->psc_ctx;
401 fovr->fo_rx_all = true;
402
403 *xpsc = psc;
404 return 0;
405 }
406
407 int
408 lagg_fail_transmit(struct lagg_proto_softc *psc, struct mbuf *m)
409 {
410 struct ifnet *ifp;
411 struct lagg_port *lp;
412 struct psref psref;
413
414 lp = lagg_link_active(psc, NULL, &psref);
415 if (lp == NULL) {
416 ifp = &psc->psc_softc->sc_if;
417 if_statinc(ifp, if_oerrors);
418 m_freem(m);
419 return ENOENT;
420 }
421
422 lagg_enqueue(psc->psc_softc, lp, m);
423 lagg_port_putref(lp, &psref);
424 return 0;
425 }
426
427 struct mbuf *
428 lagg_fail_input(struct lagg_proto_softc *psc, struct lagg_port *lp,
429 struct mbuf *m)
430 {
431 struct lagg_failover *fovr;
432 struct lagg_port *lp0;
433 struct ifnet *ifp;
434 struct psref psref;
435
436 fovr = psc->psc_ctx;
437 if (atomic_load_relaxed(&fovr->fo_rx_all))
438 return m;
439
440 lp0 = lagg_link_active(psc, NULL, &psref);
441 if (lp0 == NULL) {
442 goto drop;
443 }
444
445 if (lp0 != lp) {
446 lagg_port_putref(lp0, &psref);
447 goto drop;
448 }
449
450 lagg_port_putref(lp0, &psref);
451
452 return m;
453 drop:
454 ifp = &psc->psc_softc->sc_if;
455 if_statinc(ifp, if_ierrors);
456 m_freem(m);
457 return NULL;
458 }
459
460 void
461 lagg_fail_portstat(struct lagg_proto_softc *psc, struct lagg_port *lp,
462 struct laggreqport *resp)
463 {
464 struct lagg_failover *fovr;
465 struct lagg_proto_port *pport;
466 struct lagg_port *lp0;
467 struct psref psref;
468
469 fovr = psc->psc_ctx;
470 pport = lp->lp_proto_ctx;
471
472 if (pport->lpp_active) {
473 lp0 = lagg_link_active(psc, NULL, &psref);
474 if (lp0 == lp) {
475 SET(resp->rp_flags,
476 (LAGG_PORT_ACTIVE |
477 LAGG_PORT_COLLECTING |
478 LAGG_PORT_DISTRIBUTING));
479 } else {
480 if (fovr->fo_rx_all) {
481 SET(resp->rp_flags,
482 LAGG_PORT_COLLECTING);
483 }
484 }
485
486 if (lp0 != NULL)
487 lagg_port_putref(lp0, &psref);
488 }
489 }
490
491 int
492 lagg_fail_ioctl(struct lagg_proto_softc *psc, struct laggreqproto *lreq)
493 {
494 struct lagg_failover *fovr;
495 struct laggreq_fail *rpfail;
496 int error;
497 bool set;
498
499 error = 0;
500 fovr = psc->psc_ctx;
501 rpfail = &lreq->rp_fail;
502
503 switch (rpfail->command) {
504 case LAGGIOC_FAILSETFLAGS:
505 case LAGGIOC_FAILCLRFLAGS:
506 set = (rpfail->command == LAGGIOC_FAILSETFLAGS) ?
507 true : false;
508
509 if (ISSET(rpfail->flags, LAGGREQFAIL_RXALL))
510 fovr->fo_rx_all = set;
511 break;
512 default:
513 error = ENOTTY;
514 break;
515 }
516
517 return error;
518 }
519
520 int
521 lagg_lb_attach(struct lagg_softc *sc, struct lagg_proto_softc **xpsc)
522 {
523 struct lagg_proto_softc *psc;
524 struct lagg_lb *lb;
525
526 psc = lagg_proto_alloc(LAGG_PROTO_LOADBALANCE, sc);
527 if (psc == NULL)
528 return ENOMEM;
529
530 lb = psc->psc_ctx;
531 lb->lb_pmaps.maps_activepmap = 0;
532
533 *xpsc = psc;
534 return 0;
535 }
536
537 void
538 lagg_lb_startport(struct lagg_proto_softc *psc, struct lagg_port *lp)
539 {
540 struct lagg_lb *lb;
541 struct lagg_portmap *pm_act, *pm_next;
542 size_t n;
543
544 lb = psc->psc_ctx;
545 lagg_common_startport(psc, lp);
546
547 LAGG_PROTO_LOCK(psc);
548 pm_act = lagg_portmap_active(&lb->lb_pmaps);
549 pm_next = lagg_portmap_next(&lb->lb_pmaps);
550
551 *pm_next = *pm_act;
552
553 n = pm_next->pm_nports;
554 pm_next->pm_ports[n] = lp;
555
556 n++;
557 pm_next->pm_nports = n;
558
559 lagg_portmap_switch(&lb->lb_pmaps);
560 pserialize_perform(psc->psc_psz);
561 LAGG_PROTO_UNLOCK(psc);
562 }
563
564 void
565 lagg_lb_stopport(struct lagg_proto_softc *psc, struct lagg_port *lp)
566 {
567 struct lagg_lb *lb;
568 struct lagg_portmap *pm_act, *pm_next;
569 size_t i, n;
570
571 lb = psc->psc_ctx;
572
573 LAGG_PROTO_LOCK(psc);
574 pm_act = lagg_portmap_active(&lb->lb_pmaps);
575 pm_next = lagg_portmap_next(&lb->lb_pmaps);
576 n = 0;
577
578 for (i = 0; i < pm_act->pm_nports; i++) {
579 if (pm_act->pm_ports[i] == lp)
580 continue;
581
582 pm_next->pm_ports[n] = pm_act->pm_ports[i];
583 n++;
584 }
585
586 lagg_portmap_switch(&lb->lb_pmaps);
587 pserialize_perform(psc->psc_psz);
588 LAGG_PROTO_UNLOCK(psc);
589
590 lagg_common_stopport(psc, lp);
591 }
592
593 int
594 lagg_lb_transmit(struct lagg_proto_softc *psc, struct mbuf *m)
595 {
596 struct lagg_lb *lb;
597 struct lagg_portmap *pm;
598 struct lagg_port *lp, *lp0;
599 struct ifnet *ifp;
600 struct psref psref;
601 uint32_t hash;
602 int s;
603
604 lb = psc->psc_ctx;
605 hash = lagg_hashmbuf(psc->psc_softc, m);
606
607 s = pserialize_read_enter();
608
609 pm = lagg_portmap_active(&lb->lb_pmaps);
610 hash %= pm->pm_nports;
611 lp0 = pm->pm_ports[hash];
612 lp = lagg_link_active(psc, lp0->lp_proto_ctx, &psref);
613
614 pserialize_read_exit(s);
615
616 if (__predict_false(lp == NULL)) {
617 ifp = &psc->psc_softc->sc_if;
618 if_statinc(ifp, if_oerrors);
619 m_freem(m);
620 return ENOENT;
621 }
622
623 lagg_enqueue(psc->psc_softc, lp, m);
624 lagg_port_putref(lp, &psref);
625
626 return 0;
627 }
628
629 struct mbuf *
630 lagg_lb_input(struct lagg_proto_softc *psc __unused,
631 struct lagg_port *lp __unused, struct mbuf *m)
632 {
633
634 return m;
635 }
636
637 void
638 lagg_lb_portstat(struct lagg_proto_softc *psc, struct lagg_port *lp,
639 struct laggreqport *resp)
640 {
641 struct lagg_proto_port *pport;
642
643 pport = lp->lp_proto_ctx;
644
645 if (pport->lpp_active) {
646 SET(resp->rp_flags, LAGG_PORT_ACTIVE |
647 LAGG_PORT_COLLECTING | LAGG_PORT_DISTRIBUTING);
648 }
649 }
650