altq_rmclass.c revision 1.26 1 /* $NetBSD: altq_rmclass.c,v 1.26 2021/07/21 06:41:22 ozaki-r Exp $ */
2 /* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (c) 1991-1997 Regents of the University of California.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the Network Research
19 * Group at Lawrence Berkeley Laboratory.
20 * 4. Neither the name of the University nor of the Laboratory may be used
21 * to endorse or promote products derived from this software without
22 * specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * LBL code modified by speer (at) eng.sun.com, May 1977.
37 * For questions and/or comments, please send mail to cbq (at) ee.lbl.gov
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.26 2021/07/21 06:41:22 ozaki-r Exp $");
42
43 /* #ident "@(#)rm_class.c 1.48 97/12/05 SMI" */
44
45 #ifdef _KERNEL_OPT
46 #include "opt_altq.h"
47 #include "opt_inet.h"
48 #endif
49
50 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
51
52 #include <sys/param.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/socket.h>
56 #include <sys/systm.h>
57 #include <sys/errno.h>
58 #include <sys/time.h>
59 #ifdef ALTQ3_COMPAT
60 #include <sys/kernel.h>
61 #endif
62 #include <sys/cprng.h>
63
64 #include <net/if.h>
65 #ifdef ALTQ3_COMPAT
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/ip.h>
69 #endif
70
71 #include <altq/altq.h>
72 #include <altq/altq_rmclass.h>
73 #include <altq/altq_rmclass_debug.h>
74 #include <altq/altq_red.h>
75 #include <altq/altq_rio.h>
76
77 /*
78 * Local Macros
79 */
80
81 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
82
83 #define PSEC_TO_NSEC(t) ((t) / 1000)
84
85 /*
86 * Local routines.
87 */
88
89 static int rmc_satisfied(struct rm_class *, struct timespec *);
90 static void rmc_wrr_set_weights(struct rm_ifdat *);
91 static void rmc_depth_compute(struct rm_class *);
92 static void rmc_depth_recompute(rm_class_t *);
93
94 static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
95 static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
96
97 static int _rmc_addq(rm_class_t *, mbuf_t *);
98 static void _rmc_dropq(rm_class_t *);
99 static mbuf_t *_rmc_getq(rm_class_t *);
100 static mbuf_t *_rmc_pollq(rm_class_t *);
101
102 static int rmc_under_limit(struct rm_class *, struct timespec *);
103 static void rmc_tl_satisfied(struct rm_ifdat *, struct timespec *);
104 static void rmc_drop_action(struct rm_class *);
105 static void rmc_restart(struct rm_class *);
106 static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
107
108 #define BORROW_OFFTIME
109 /*
110 * BORROW_OFFTIME (experimental):
111 * borrow the offtime of the class borrowing from.
112 * the reason is that when its own offtime is set, the class is unable
113 * to borrow much, especially when cutoff is taking effect.
114 * but when the borrowed class is overloaded (advidle is close to minidle),
115 * use the borrowing class's offtime to avoid overload.
116 */
117 #define ADJUST_CUTOFF
118 /*
119 * ADJUST_CUTOFF (experimental):
120 * if no underlimit class is found due to cutoff, increase cutoff and
121 * retry the scheduling loop.
122 * also, don't invoke delay_actions while cutoff is taking effect,
123 * since a sleeping class won't have a chance to be scheduled in the
124 * next loop.
125 *
126 * now heuristics for setting the top-level variable (cutoff_) becomes:
127 * 1. if a packet arrives for a not-overlimit class, set cutoff
128 * to the depth of the class.
129 * 2. if cutoff is i, and a packet arrives for an overlimit class
130 * with an underlimit ancestor at a lower level than i (say j),
131 * then set cutoff to j.
132 * 3. at scheduling a packet, if there is no underlimit class
133 * due to the current cutoff level, increase cutoff by 1 and
134 * then try to schedule again.
135 */
136
137 /*
138 * rm_class_t *
139 * rmc_newclass(...) - Create a new resource management class at priority
140 * 'pri' on the interface given by 'ifd'.
141 *
142 * nsecPerByte is the data rate of the interface in nanoseconds/byte.
143 * E.g., 800 for a 10Mb/s ethernet. If the class gets less
144 * than 100% of the bandwidth, this number should be the
145 * 'effective' rate for the class. Let f be the
146 * bandwidth fraction allocated to this class, and let
147 * nsPerByte be the data rate of the output link in
148 * nanoseconds/byte. Then nsecPerByte is set to
149 * nsPerByte / f. E.g., 1600 (= 800 / .5)
150 * for a class that gets 50% of an ethernet's bandwidth.
151 *
152 * action the routine to call when the class is over limit.
153 *
154 * maxq max allowable queue size for class (in packets).
155 *
156 * parent parent class pointer.
157 *
158 * borrow class to borrow from (should be either 'parent' or null).
159 *
160 * maxidle max value allowed for class 'idle' time estimate (this
161 * parameter determines how large an initial burst of packets
162 * can be before overlimit action is invoked.
163 *
164 * offtime how long 'delay' action will delay when class goes over
165 * limit (this parameter determines the steady-state burst
166 * size when a class is running over its limit).
167 *
168 * Maxidle and offtime have to be computed from the following: If the
169 * average packet size is s, the bandwidth fraction allocated to this
170 * class is f, we want to allow b packet bursts, and the gain of the
171 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
172 *
173 * ptime = s * nsPerByte * (1 - f) / f
174 * maxidle = ptime * (1 - g^b) / g^b
175 * minidle = -ptime * (1 / (f - 1))
176 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
177 *
178 * Operationally, it's convenient to specify maxidle & offtime in units
179 * independent of the link bandwidth so the maxidle & offtime passed to
180 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
181 * (The constant factor is a scale factor needed to make the parameters
182 * integers. This scaling also means that the 'unscaled' values of
183 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
184 * not nanoseconds.) Also note that the 'idle' filter computation keeps
185 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
186 * maxidle also must be scaled upward by this value. Thus, the passed
187 * values for maxidle and offtime can be computed as follows:
188 *
189 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
190 * offtime = offtime * 8 / (1000 * nsecPerByte)
191 *
192 * When USE_HRTIME is employed, then maxidle and offtime become:
193 * maxidle = maxilde * (8.0 / nsecPerByte);
194 * offtime = offtime * (8.0 / nsecPerByte);
195 */
196 struct rm_class *
197 rmc_newclass(int pri, struct rm_ifdat *ifd, u_long psecPerByte,
198 void (*action)(rm_class_t *, rm_class_t *), int maxq,
199 struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
200 int minidle, u_int offtime, int pktsize, int flags)
201 {
202 struct rm_class *cl;
203 struct rm_class *peer;
204 int s;
205
206 if (pri >= RM_MAXPRIO)
207 return (NULL);
208 #ifndef ALTQ_RED
209 if (flags & RMCF_RED) {
210 #ifdef ALTQ_DEBUG
211 printf("rmc_newclass: RED not configured for CBQ!\n");
212 #endif
213 return (NULL);
214 }
215 #endif
216 #ifndef ALTQ_RIO
217 if (flags & RMCF_RIO) {
218 #ifdef ALTQ_DEBUG
219 printf("rmc_newclass: RIO not configured for CBQ!\n");
220 #endif
221 return (NULL);
222 }
223 #endif
224
225 cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_WAITOK|M_ZERO);
226 if (cl == NULL)
227 return (NULL);
228 CALLOUT_INIT(&cl->callout_);
229
230 cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
231 if (cl->q_ == NULL) {
232 free(cl, M_DEVBUF);
233 return (NULL);
234 }
235
236 /*
237 * Class initialization.
238 */
239 cl->children_ = NULL;
240 cl->parent_ = parent;
241 cl->borrow_ = borrow;
242 cl->leaf_ = 1;
243 cl->ifdat_ = ifd;
244 cl->pri_ = pri;
245 cl->allotment_ = (u_int)(RM_PS_PER_SEC / psecPerByte); /* Bytes per sec */
246 cl->depth_ = 0;
247 cl->qthresh_ = 0;
248 cl->ps_per_byte_ = psecPerByte;
249
250 qlimit(cl->q_) = maxq;
251 qtype(cl->q_) = Q_DROPHEAD;
252 qlen(cl->q_) = 0;
253 cl->flags_ = flags;
254
255 #if 1 /* minidle is also scaled in ALTQ */
256 cl->minidle_ = ((long)minidle * (long)psecPerByte) / 8;
257 if (cl->minidle_ > 0)
258 cl->minidle_ = 0;
259 #else
260 cl->minidle_ = minidle;
261 #endif
262 cl->maxidle_ = ((long)maxidle * (long)psecPerByte) / 8;
263 if (cl->maxidle_ == 0)
264 cl->maxidle_ = 1;
265 #if 1 /* offtime is also scaled in ALTQ */
266 cl->avgidle_ = cl->maxidle_;
267 cl->offtime_ = (((long)offtime * (long)psecPerByte) / 8) >> RM_FILTER_GAIN;
268 if (cl->offtime_ == 0)
269 cl->offtime_ = 1;
270 #else
271 cl->avgidle_ = 0;
272 cl->offtime_ = (offtime * nsecPerByte) / 8;
273 #endif
274 cl->overlimit = action;
275
276 #ifdef ALTQ_RED
277 if (flags & (RMCF_RED|RMCF_RIO)) {
278 int red_flags, red_pkttime;
279
280 red_flags = 0;
281 if (flags & RMCF_ECN)
282 red_flags |= REDF_ECN;
283 if (flags & RMCF_FLOWVALVE)
284 red_flags |= REDF_FLOWVALVE;
285 #ifdef ALTQ_RIO
286 if (flags & RMCF_CLEARDSCP)
287 red_flags |= RIOF_CLEARDSCP;
288 #endif
289 red_pkttime = nsecPerByte * pktsize / 1000;
290
291 if (flags & RMCF_RED) {
292 cl->red_ = red_alloc(0, 0,
293 qlimit(cl->q_) * 10/100,
294 qlimit(cl->q_) * 30/100,
295 red_flags, red_pkttime);
296 if (cl->red_ != NULL)
297 qtype(cl->q_) = Q_RED;
298 }
299 #ifdef ALTQ_RIO
300 else {
301 cl->red_ = (red_t *)rio_alloc(0, NULL,
302 red_flags, red_pkttime);
303 if (cl->red_ != NULL)
304 qtype(cl->q_) = Q_RIO;
305 }
306 #endif
307 }
308 #endif /* ALTQ_RED */
309
310 /*
311 * put the class into the class tree
312 */
313 s = splnet();
314 if ((peer = ifd->active_[pri]) != NULL) {
315 /* find the last class at this pri */
316 cl->peer_ = peer;
317 while (peer->peer_ != ifd->active_[pri])
318 peer = peer->peer_;
319 peer->peer_ = cl;
320 } else {
321 ifd->active_[pri] = cl;
322 cl->peer_ = cl;
323 }
324
325 if (cl->parent_) {
326 cl->next_ = parent->children_;
327 parent->children_ = cl;
328 parent->leaf_ = 0;
329 }
330
331 /*
332 * Compute the depth of this class and its ancestors in the class
333 * hierarchy.
334 */
335 rmc_depth_compute(cl);
336
337 /*
338 * If CBQ's WRR is enabled, then initialize the class WRR state.
339 */
340 if (ifd->wrr_) {
341 ifd->num_[pri]++;
342 ifd->alloc_[pri] += cl->allotment_;
343 rmc_wrr_set_weights(ifd);
344 }
345 splx(s);
346 return (cl);
347 }
348
349 int
350 rmc_modclass(struct rm_class *cl, u_long psecPerByte, int maxq, u_int maxidle,
351 int minidle, u_int offtime, int pktsize)
352 {
353 struct rm_ifdat *ifd;
354 u_int old_allotment;
355 int s;
356
357 ifd = cl->ifdat_;
358 old_allotment = cl->allotment_;
359
360 s = splnet();
361 cl->allotment_ = (u_int)(RM_PS_PER_SEC / psecPerByte); /* Bytes per sec */
362 cl->qthresh_ = 0;
363 cl->ps_per_byte_ = psecPerByte;
364
365 qlimit(cl->q_) = maxq;
366
367 #if 1 /* minidle is also scaled in ALTQ */
368 cl->minidle_ = ((long)minidle * (long)psecPerByte) / 8;
369 if (cl->minidle_ > 0)
370 cl->minidle_ = 0;
371 #else
372 cl->minidle_ = minidle;
373 #endif
374 cl->maxidle_ = ((long)maxidle * (long)psecPerByte) / 8;
375 if (cl->maxidle_ == 0)
376 cl->maxidle_ = 1;
377 #if 1 /* offtime is also scaled in ALTQ */
378 cl->avgidle_ = cl->maxidle_;
379 cl->offtime_ = (((long)offtime * (long)psecPerByte) / 8) >> RM_FILTER_GAIN;
380 if (cl->offtime_ == 0)
381 cl->offtime_ = 1;
382 #else
383 cl->avgidle_ = 0;
384 cl->offtime_ = (offtime * nsecPerByte) / 8;
385 #endif
386
387 /*
388 * If CBQ's WRR is enabled, then initialize the class WRR state.
389 */
390 if (ifd->wrr_) {
391 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
392 rmc_wrr_set_weights(ifd);
393 }
394 splx(s);
395 return (0);
396 }
397
398 /*
399 * static void
400 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
401 * the appropriate run robin weights for the CBQ weighted round robin
402 * algorithm.
403 *
404 * Returns: NONE
405 */
406
407 static void
408 rmc_wrr_set_weights(struct rm_ifdat *ifd)
409 {
410 int i;
411 struct rm_class *cl, *clh;
412
413 for (i = 0; i < RM_MAXPRIO; i++) {
414 /*
415 * This is inverted from that of the simulator to
416 * maintain precision.
417 */
418 if (ifd->num_[i] == 0)
419 ifd->M_[i] = 0;
420 else
421 ifd->M_[i] = ifd->alloc_[i] /
422 (ifd->num_[i] * ifd->maxpkt_);
423 /*
424 * Compute the weighted allotment for each class.
425 * This takes the expensive div instruction out
426 * of the main loop for the wrr scheduling path.
427 * These only get recomputed when a class comes or
428 * goes.
429 */
430 if (ifd->active_[i] != NULL) {
431 clh = cl = ifd->active_[i];
432 do {
433 /* safe-guard for slow link or alloc_ == 0 */
434 if (ifd->M_[i] == 0)
435 cl->w_allotment_ = 0;
436 else
437 cl->w_allotment_ = cl->allotment_ /
438 ifd->M_[i];
439 cl = cl->peer_;
440 } while ((cl != NULL) && (cl != clh));
441 }
442 }
443 }
444
445 int
446 rmc_get_weight(struct rm_ifdat *ifd, int pri)
447 {
448 if ((pri >= 0) && (pri < RM_MAXPRIO))
449 return (ifd->M_[pri]);
450 else
451 return (0);
452 }
453
454 /*
455 * static void
456 * rmc_depth_compute(struct rm_class *cl) - This function computes the
457 * appropriate depth of class 'cl' and its ancestors.
458 *
459 * Returns: NONE
460 */
461
462 static void
463 rmc_depth_compute(struct rm_class *cl)
464 {
465 rm_class_t *t = cl, *p;
466
467 /*
468 * Recompute the depth for the branch of the tree.
469 */
470 while (t != NULL) {
471 p = t->parent_;
472 if (p && (t->depth_ >= p->depth_)) {
473 p->depth_ = t->depth_ + 1;
474 t = p;
475 } else
476 t = NULL;
477 }
478 }
479
480 /*
481 * static void
482 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
483 * the depth of the tree after a class has been deleted.
484 *
485 * Returns: NONE
486 */
487
488 static void
489 rmc_depth_recompute(rm_class_t *cl)
490 {
491 #if 1 /* ALTQ */
492 rm_class_t *p, *t;
493
494 p = cl;
495 while (p != NULL) {
496 if ((t = p->children_) == NULL) {
497 p->depth_ = 0;
498 } else {
499 int cdepth = 0;
500
501 while (t != NULL) {
502 if (t->depth_ > cdepth)
503 cdepth = t->depth_;
504 t = t->next_;
505 }
506
507 if (p->depth_ == cdepth + 1)
508 /* no change to this parent */
509 return;
510
511 p->depth_ = cdepth + 1;
512 }
513
514 p = p->parent_;
515 }
516 #else
517 rm_class_t *t;
518
519 if (cl->depth_ >= 1) {
520 if (cl->children_ == NULL) {
521 cl->depth_ = 0;
522 } else if ((t = cl->children_) != NULL) {
523 while (t != NULL) {
524 if (t->children_ != NULL)
525 rmc_depth_recompute(t);
526 t = t->next_;
527 }
528 } else
529 rmc_depth_compute(cl);
530 }
531 #endif
532 }
533
534 /*
535 * void
536 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
537 * function deletes a class from the link-sharing structure and frees
538 * all resources associated with the class.
539 *
540 * Returns: NONE
541 */
542
543 void
544 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
545 {
546 struct rm_class *p, *head, *previous;
547 int s;
548
549 ASSERT(cl->children_ == NULL);
550
551 if (cl->sleeping_)
552 CALLOUT_STOP(&cl->callout_);
553
554 s = splnet();
555 /*
556 * Free packets in the packet queue.
557 * XXX - this may not be a desired behavior. Packets should be
558 * re-queued.
559 */
560 rmc_dropall(cl);
561
562 /*
563 * If the class has a parent, then remove the class from the
564 * class from the parent's children chain.
565 */
566 if (cl->parent_ != NULL) {
567 head = cl->parent_->children_;
568 p = previous = head;
569 if (head->next_ == NULL) {
570 ASSERT(head == cl);
571 cl->parent_->children_ = NULL;
572 cl->parent_->leaf_ = 1;
573 } else while (p != NULL) {
574 if (p == cl) {
575 if (cl == head)
576 cl->parent_->children_ = cl->next_;
577 else
578 previous->next_ = cl->next_;
579 cl->next_ = NULL;
580 p = NULL;
581 } else {
582 previous = p;
583 p = p->next_;
584 }
585 }
586 }
587
588 /*
589 * Delete class from class priority peer list.
590 */
591 if ((p = ifd->active_[cl->pri_]) != NULL) {
592 /*
593 * If there is more than one member of this priority
594 * level, then look for class(cl) in the priority level.
595 */
596 if (p != p->peer_) {
597 while (p->peer_ != cl)
598 p = p->peer_;
599 p->peer_ = cl->peer_;
600
601 if (ifd->active_[cl->pri_] == cl)
602 ifd->active_[cl->pri_] = cl->peer_;
603 } else {
604 ASSERT(p == cl);
605 ifd->active_[cl->pri_] = NULL;
606 }
607 }
608
609 /*
610 * Recompute the WRR weights.
611 */
612 if (ifd->wrr_) {
613 ifd->alloc_[cl->pri_] -= cl->allotment_;
614 ifd->num_[cl->pri_]--;
615 rmc_wrr_set_weights(ifd);
616 }
617
618 /*
619 * Re-compute the depth of the tree.
620 */
621 #if 1 /* ALTQ */
622 rmc_depth_recompute(cl->parent_);
623 #else
624 rmc_depth_recompute(ifd->root_);
625 #endif
626
627 splx(s);
628
629 /*
630 * Free the class structure.
631 */
632 if (cl->red_ != NULL) {
633 #ifdef ALTQ_RIO
634 if (q_is_rio(cl->q_))
635 rio_destroy((rio_t *)cl->red_);
636 #endif
637 #ifdef ALTQ_RED
638 if (q_is_red(cl->q_))
639 red_destroy(cl->red_);
640 #endif
641 }
642 free(cl->q_, M_DEVBUF);
643 free(cl, M_DEVBUF);
644 }
645
646
647 /*
648 * int
649 * rmc_init(...) - Initialize the resource management data structures
650 * associated with the output portion of interface 'ifp'. 'ifd' is
651 * where the structures will be built (for backwards compatibility, the
652 * structures aren't kept in the ifnet struct). 'nsecPerByte'
653 * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
654 * 'restart' is the driver-specific routine that the generic 'delay
655 * until under limit' action will call to restart output. `maxq'
656 * is the queue size of the 'link' & 'default' classes. 'maxqueued'
657 * is the maximum number of packets that the resource management
658 * code will allow to be queued 'downstream' (this is typically 1).
659 *
660 * Returns: 0 on success
661 */
662
663 int
664 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_long psecPerByte,
665 void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
666 int minidle, u_int offtime, int flags)
667 {
668 int i, mtu;
669
670 /*
671 * Initialize the CBQ tracing/debug facility.
672 */
673 CBQTRACEINIT();
674
675 mtu = ifq->altq_ifp->if_mtu;
676 if (mtu < 1) {
677 printf("altq: %s: invalid MTU (interface not initialized?)\n",
678 ifq->altq_ifp->if_xname);
679 return (EINVAL);
680 }
681
682 (void)memset((char *)ifd, 0, sizeof (*ifd));
683 ifd->ifq_ = ifq;
684 ifd->restart = restart;
685 ifd->maxqueued_ = maxqueued;
686 ifd->ps_per_byte_ = psecPerByte;
687 ifd->maxpkt_ = mtu;
688 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
689 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
690 #if 1
691 ifd->maxiftime_ = mtu * psecPerByte / 1000 / 1000 * 16;
692 if ((long)mtu * psecPerByte > (long)10 * 1000000000)
693 ifd->maxiftime_ /= 4;
694 #endif
695
696 reset_cutoff(ifd);
697 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
698
699 /*
700 * Initialize the CBQ's WRR state.
701 */
702 for (i = 0; i < RM_MAXPRIO; i++) {
703 ifd->alloc_[i] = 0;
704 ifd->M_[i] = 0;
705 ifd->num_[i] = 0;
706 ifd->na_[i] = 0;
707 ifd->active_[i] = NULL;
708 }
709
710 /*
711 * Initialize current packet state.
712 */
713 ifd->qi_ = 0;
714 ifd->qo_ = 0;
715 for (i = 0; i < RM_MAXQUEUED; i++) {
716 ifd->class_[i] = NULL;
717 ifd->curlen_[i] = 0;
718 ifd->borrowed_[i] = NULL;
719 }
720
721 /*
722 * Create the root class of the link-sharing structure.
723 */
724 if ((ifd->root_ = rmc_newclass(0, ifd,
725 psecPerByte,
726 rmc_root_overlimit, maxq, 0, 0,
727 maxidle, minidle, offtime,
728 0, 0)) == NULL) {
729 printf("rmc_init: root class not allocated\n");
730 return (ENOMEM);
731 }
732 ifd->root_->depth_ = 0;
733
734 return (0);
735 }
736
737 /*
738 * void
739 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
740 * mbuf 'm' to queue for resource class 'cl'. This routine is called
741 * by a driver's if_output routine. This routine must be called with
742 * output packet completion interrupts locked out (to avoid racing with
743 * rmc_dequeue_next).
744 *
745 * Returns: 0 on successful queueing
746 * -1 when packet drop occurs
747 */
748 int
749 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
750 {
751 struct timespec now;
752 struct rm_ifdat *ifd = cl->ifdat_;
753 int cpri = cl->pri_;
754 int is_empty = qempty(cl->q_);
755
756 RM_GETTIME(now);
757 if (ifd->cutoff_ > 0) {
758 if (TS_LT(&cl->undertime_, &now)) {
759 if (ifd->cutoff_ > cl->depth_)
760 ifd->cutoff_ = cl->depth_;
761 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
762 }
763 #if 1 /* ALTQ */
764 else {
765 /*
766 * the class is overlimit. if the class has
767 * underlimit ancestors, set cutoff to the lowest
768 * depth among them.
769 */
770 struct rm_class *borrow = cl->borrow_;
771
772 while (borrow != NULL &&
773 borrow->depth_ < ifd->cutoff_) {
774 if (TS_LT(&borrow->undertime_, &now)) {
775 ifd->cutoff_ = borrow->depth_;
776 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
777 break;
778 }
779 borrow = borrow->borrow_;
780 }
781 }
782 #else /* !ALTQ */
783 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
784 if (TS_LT(&cl->borrow_->undertime_, &now)) {
785 ifd->cutoff_ = cl->borrow_->depth_;
786 CBQTRACE(rmc_queue_packet, 'ffob',
787 cl->borrow_->depth_);
788 }
789 }
790 #endif /* !ALTQ */
791 }
792
793 if (_rmc_addq(cl, m) < 0)
794 /* failed */
795 return (-1);
796
797 if (is_empty) {
798 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
799 ifd->na_[cpri]++;
800 }
801
802 if (qlen(cl->q_) > qlimit(cl->q_)) {
803 /* note: qlimit can be set to 0 or 1 */
804 rmc_drop_action(cl);
805 return (-1);
806 }
807 return (0);
808 }
809
810 /*
811 * void
812 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now) - Check all
813 * classes to see if there are satified.
814 */
815
816 static void
817 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timespec *now)
818 {
819 int i;
820 rm_class_t *p, *bp;
821
822 for (i = RM_MAXPRIO - 1; i >= 0; i--) {
823 if ((bp = ifd->active_[i]) != NULL) {
824 p = bp;
825 do {
826 if (!rmc_satisfied(p, now)) {
827 ifd->cutoff_ = p->depth_;
828 return;
829 }
830 p = p->peer_;
831 } while (p != bp);
832 }
833 }
834
835 reset_cutoff(ifd);
836 }
837
838 /*
839 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
840 */
841
842 static int
843 rmc_satisfied(struct rm_class *cl, struct timespec *now)
844 {
845 rm_class_t *p;
846
847 if (cl == NULL)
848 return (1);
849 if (TS_LT(now, &cl->undertime_))
850 return (1);
851 if (cl->depth_ == 0) {
852 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
853 return (0);
854 else
855 return (1);
856 }
857 if (cl->children_ != NULL) {
858 p = cl->children_;
859 while (p != NULL) {
860 if (!rmc_satisfied(p, now))
861 return (0);
862 p = p->next_;
863 }
864 }
865
866 return (1);
867 }
868
869 /*
870 * Return 1 if class 'cl' is under limit or can borrow from a parent,
871 * 0 if overlimit. As a side-effect, this routine will invoke the
872 * class overlimit action if the class if overlimit.
873 */
874
875 static int
876 rmc_under_limit(struct rm_class *cl, struct timespec *now)
877 {
878 rm_class_t *p = cl;
879 rm_class_t *top;
880 struct rm_ifdat *ifd = cl->ifdat_;
881
882 ifd->borrowed_[ifd->qi_] = NULL;
883 /*
884 * If cl is the root class, then always return that it is
885 * underlimit. Otherwise, check to see if the class is underlimit.
886 */
887 if (cl->parent_ == NULL)
888 return (1);
889
890 if (cl->sleeping_) {
891 if (TS_LT(now, &cl->undertime_))
892 return (0);
893
894 CALLOUT_STOP(&cl->callout_);
895 cl->sleeping_ = 0;
896 cl->undertime_.tv_sec = 0;
897 return (1);
898 }
899
900 top = NULL;
901 while (cl->undertime_.tv_sec && TS_LT(now, &cl->undertime_)) {
902 if (((cl = cl->borrow_) == NULL) ||
903 (cl->depth_ > ifd->cutoff_)) {
904 #ifdef ADJUST_CUTOFF
905 if (cl != NULL)
906 /* cutoff is taking effect, just
907 return false without calling
908 the delay action. */
909 return (0);
910 #endif
911 #ifdef BORROW_OFFTIME
912 /*
913 * check if the class can borrow offtime too.
914 * borrow offtime from the top of the borrow
915 * chain if the top class is not overloaded.
916 */
917 if (cl != NULL) {
918 /* cutoff is taking effect, use this class as top. */
919 top = cl;
920 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
921 }
922 if (top != NULL && top->avgidle_ == top->minidle_)
923 top = NULL;
924 p->overtime_ = *now;
925 (p->overlimit)(p, top);
926 #else
927 p->overtime_ = *now;
928 (p->overlimit)(p, NULL);
929 #endif
930 return (0);
931 }
932 top = cl;
933 }
934
935 if (cl != p)
936 ifd->borrowed_[ifd->qi_] = cl;
937 return (1);
938 }
939
940 /*
941 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
942 * Packet-by-packet round robin.
943 *
944 * The heart of the weighted round-robin scheduler, which decides which
945 * class next gets to send a packet. Highest priority first, then
946 * weighted round-robin within priorites.
947 *
948 * Each able-to-send class gets to send until its byte allocation is
949 * exhausted. Thus, the active pointer is only changed after a class has
950 * exhausted its allocation.
951 *
952 * If the scheduler finds no class that is underlimit or able to borrow,
953 * then the first class found that had a nonzero queue and is allowed to
954 * borrow gets to send.
955 */
956
957 static mbuf_t *
958 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
959 {
960 struct rm_class *cl = NULL, *first = NULL;
961 u_int deficit;
962 int cpri;
963 mbuf_t *m;
964 struct timespec now;
965
966 RM_GETTIME(now);
967
968 /*
969 * if the driver polls the top of the queue and then removes
970 * the polled packet, we must return the same packet.
971 */
972 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
973 cl = ifd->pollcache_;
974 cpri = cl->pri_;
975 if (ifd->efficient_) {
976 /* check if this class is overlimit */
977 if (cl->undertime_.tv_sec != 0 &&
978 rmc_under_limit(cl, &now) == 0)
979 first = cl;
980 }
981 ifd->pollcache_ = NULL;
982 goto _wrr_out;
983 }
984 else {
985 /* mode == ALTDQ_POLL || pollcache == NULL */
986 ifd->pollcache_ = NULL;
987 ifd->borrowed_[ifd->qi_] = NULL;
988 }
989 #ifdef ADJUST_CUTOFF
990 _again:
991 #endif
992 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
993 if (ifd->na_[cpri] == 0)
994 continue;
995 deficit = 0;
996 /*
997 * Loop through twice for a priority level, if some class
998 * was unable to send a packet the first round because
999 * of the weighted round-robin mechanism.
1000 * During the second loop at this level, deficit==2.
1001 * (This second loop is not needed if for every class,
1002 * "M[cl->pri_])" times "cl->allotment" is greater than
1003 * the byte size for the largest packet in the class.)
1004 */
1005 _wrr_loop:
1006 cl = ifd->active_[cpri];
1007 ASSERT(cl != NULL);
1008 do {
1009 if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
1010 cl->bytes_alloc_ += cl->w_allotment_;
1011 if (!qempty(cl->q_)) {
1012 if ((cl->undertime_.tv_sec == 0) ||
1013 rmc_under_limit(cl, &now)) {
1014 if (cl->bytes_alloc_ > 0 || deficit > 1)
1015 goto _wrr_out;
1016
1017 /* underlimit but no alloc */
1018 deficit = 1;
1019 #if 1
1020 ifd->borrowed_[ifd->qi_] = NULL;
1021 #endif
1022 }
1023 else if (first == NULL && cl->borrow_ != NULL)
1024 first = cl; /* borrowing candidate */
1025 }
1026
1027 cl->bytes_alloc_ = 0;
1028 cl = cl->peer_;
1029 } while (cl != ifd->active_[cpri]);
1030
1031 if (deficit == 1) {
1032 /* first loop found an underlimit class with deficit */
1033 /* Loop on same priority level, with new deficit. */
1034 deficit = 2;
1035 goto _wrr_loop;
1036 }
1037 }
1038
1039 #ifdef ADJUST_CUTOFF
1040 /*
1041 * no underlimit class found. if cutoff is taking effect,
1042 * increase cutoff and try again.
1043 */
1044 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1045 ifd->cutoff_++;
1046 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1047 goto _again;
1048 }
1049 #endif /* ADJUST_CUTOFF */
1050 /*
1051 * If LINK_EFFICIENCY is turned on, then the first overlimit
1052 * class we encounter will send a packet if all the classes
1053 * of the link-sharing structure are overlimit.
1054 */
1055 reset_cutoff(ifd);
1056 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1057
1058 if (!ifd->efficient_ || first == NULL)
1059 return (NULL);
1060
1061 cl = first;
1062 cpri = cl->pri_;
1063 #if 0 /* too time-consuming for nothing */
1064 if (cl->sleeping_)
1065 CALLOUT_STOP(&cl->callout_);
1066 cl->sleeping_ = 0;
1067 cl->undertime_.tv_sec = 0;
1068 #endif
1069 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1070 ifd->cutoff_ = cl->borrow_->depth_;
1071
1072 /*
1073 * Deque the packet and do the book keeping...
1074 */
1075 _wrr_out:
1076 if (op == ALTDQ_REMOVE) {
1077 m = _rmc_getq(cl);
1078 if (m == NULL)
1079 panic("_rmc_wrr_dequeue_next");
1080 if (qempty(cl->q_))
1081 ifd->na_[cpri]--;
1082
1083 /*
1084 * Update class statistics and link data.
1085 */
1086 if (cl->bytes_alloc_ > 0)
1087 cl->bytes_alloc_ -= m_pktlen(m);
1088
1089 if ((cl->bytes_alloc_ <= 0) || first == cl)
1090 ifd->active_[cl->pri_] = cl->peer_;
1091 else
1092 ifd->active_[cl->pri_] = cl;
1093
1094 ifd->class_[ifd->qi_] = cl;
1095 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1096 ifd->now_[ifd->qi_] = now;
1097 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1098 ifd->queued_++;
1099 } else {
1100 /* mode == ALTDQ_PPOLL */
1101 m = _rmc_pollq(cl);
1102 ifd->pollcache_ = cl;
1103 }
1104 return (m);
1105 }
1106
1107 /*
1108 * Dequeue & return next packet from the highest priority class that
1109 * has a packet to send & has enough allocation to send it. This
1110 * routine is called by a driver whenever it needs a new packet to
1111 * output.
1112 */
1113 static mbuf_t *
1114 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
1115 {
1116 mbuf_t *m;
1117 int cpri;
1118 struct rm_class *cl, *first = NULL;
1119 struct timespec now;
1120
1121 RM_GETTIME(now);
1122
1123 /*
1124 * if the driver polls the top of the queue and then removes
1125 * the polled packet, we must return the same packet.
1126 */
1127 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1128 cl = ifd->pollcache_;
1129 cpri = cl->pri_;
1130 ifd->pollcache_ = NULL;
1131 goto _prr_out;
1132 } else {
1133 /* mode == ALTDQ_POLL || pollcache == NULL */
1134 ifd->pollcache_ = NULL;
1135 ifd->borrowed_[ifd->qi_] = NULL;
1136 }
1137 #ifdef ADJUST_CUTOFF
1138 _again:
1139 #endif
1140 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1141 if (ifd->na_[cpri] == 0)
1142 continue;
1143 cl = ifd->active_[cpri];
1144 ASSERT(cl != NULL);
1145 do {
1146 if (!qempty(cl->q_)) {
1147 if ((cl->undertime_.tv_sec == 0) ||
1148 rmc_under_limit(cl, &now))
1149 goto _prr_out;
1150 if (first == NULL && cl->borrow_ != NULL)
1151 first = cl;
1152 }
1153 cl = cl->peer_;
1154 } while (cl != ifd->active_[cpri]);
1155 }
1156
1157 #ifdef ADJUST_CUTOFF
1158 /*
1159 * no underlimit class found. if cutoff is taking effect, increase
1160 * cutoff and try again.
1161 */
1162 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1163 ifd->cutoff_++;
1164 goto _again;
1165 }
1166 #endif /* ADJUST_CUTOFF */
1167 /*
1168 * If LINK_EFFICIENCY is turned on, then the first overlimit
1169 * class we encounter will send a packet if all the classes
1170 * of the link-sharing structure are overlimit.
1171 */
1172 reset_cutoff(ifd);
1173 if (!ifd->efficient_ || first == NULL)
1174 return (NULL);
1175
1176 cl = first;
1177 cpri = cl->pri_;
1178 #if 0 /* too time-consuming for nothing */
1179 if (cl->sleeping_)
1180 CALLOUT_STOP(&cl->callout_);
1181 cl->sleeping_ = 0;
1182 cl->undertime_.tv_sec = 0;
1183 #endif
1184 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1185 ifd->cutoff_ = cl->borrow_->depth_;
1186
1187 /*
1188 * Deque the packet and do the book keeping...
1189 */
1190 _prr_out:
1191 if (op == ALTDQ_REMOVE) {
1192 m = _rmc_getq(cl);
1193 if (m == NULL)
1194 panic("_rmc_prr_dequeue_next");
1195 if (qempty(cl->q_))
1196 ifd->na_[cpri]--;
1197
1198 ifd->active_[cpri] = cl->peer_;
1199
1200 ifd->class_[ifd->qi_] = cl;
1201 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1202 ifd->now_[ifd->qi_] = now;
1203 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1204 ifd->queued_++;
1205 } else {
1206 /* mode == ALTDQ_POLL */
1207 m = _rmc_pollq(cl);
1208 ifd->pollcache_ = cl;
1209 }
1210 return (m);
1211 }
1212
1213 /*
1214 * mbuf_t *
1215 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timespec *now) - this function
1216 * is invoked by the packet driver to get the next packet to be
1217 * dequeued and output on the link. If WRR is enabled, then the
1218 * WRR dequeue next routine will determine the next packet to sent.
1219 * Otherwise, packet-by-packet round robin is invoked.
1220 *
1221 * Returns: NULL, if a packet is not available or if all
1222 * classes are overlimit.
1223 *
1224 * Otherwise, Pointer to the next packet.
1225 */
1226
1227 mbuf_t *
1228 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
1229 {
1230 if (ifd->queued_ >= ifd->maxqueued_)
1231 return (NULL);
1232 else if (ifd->wrr_)
1233 return (_rmc_wrr_dequeue_next(ifd, mode));
1234 else
1235 return (_rmc_prr_dequeue_next(ifd, mode));
1236 }
1237
1238 /*
1239 * Update the utilization estimate for the packet that just completed.
1240 * The packet's class & the parent(s) of that class all get their
1241 * estimators updated. This routine is called by the driver's output-
1242 * packet-completion interrupt service routine.
1243 */
1244
1245 /*
1246 * a macro to approximate "divide by 1000" that gives 0.000999,
1247 * if a value has enough effective digits.
1248 * (on pentium, mul takes 9 cycles but div takes 46!)
1249 */
1250 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1251 /* Don't worry. Recent compilers don't use div. */
1252 #define PSEC_TO_USEC(t) ((t) / 1000 / 1000)
1253 void
1254 rmc_update_class_util(struct rm_ifdat *ifd)
1255 {
1256 long idle, avgidle, pktlen;
1257 long pkt_time;
1258 long tidle;
1259 rm_class_t *cl, *cl0, *borrowed;
1260 rm_class_t *borrows;
1261 struct timespec *nowp;
1262
1263 /*
1264 * Get the most recent completed class.
1265 */
1266 if ((cl = ifd->class_[ifd->qo_]) == NULL)
1267 return;
1268
1269 cl0 = cl;
1270 pktlen = (long)ifd->curlen_[ifd->qo_];
1271 borrowed = ifd->borrowed_[ifd->qo_];
1272 borrows = borrowed;
1273
1274 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1275
1276 /*
1277 * Run estimator on class and its ancestors.
1278 */
1279 /*
1280 * rm_update_class_util is designed to be called when the
1281 * transfer is completed from a xmit complete interrupt,
1282 * but most drivers don't implement an upcall for that.
1283 * so, just use estimated completion time.
1284 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1285 */
1286 nowp = &ifd->now_[ifd->qo_];
1287 /* get pkt_time (for link) in usec */
1288 #if 1 /* use approximation */
1289 pkt_time = (long)ifd->curlen_[ifd->qo_] * (long)ifd->ps_per_byte_;
1290 pkt_time = PSEC_TO_NSEC(pkt_time);
1291 #else
1292 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1293 #endif
1294 #if 1 /* ALTQ4PPP */
1295 if (TS_LT(nowp, &ifd->ifnow_)) {
1296 int iftime;
1297
1298 /*
1299 * make sure the estimated completion time does not go
1300 * too far. it can happen when the link layer supports
1301 * data compression or the interface speed is set to
1302 * a much lower value.
1303 */
1304 TS_DELTA(&ifd->ifnow_, nowp, iftime);
1305 if (iftime+pkt_time < ifd->maxiftime_) {
1306 TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1307 } else {
1308 TS_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1309 }
1310 } else {
1311 TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1312 }
1313 #else
1314 if (TS_LT(nowp, &ifd->ifnow_)) {
1315 TS_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1316 } else {
1317 TS_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1318 }
1319 #endif
1320
1321 while (cl != NULL) {
1322 TS_DELTA(&ifd->ifnow_, &cl->last_, idle);
1323 if (idle >= 2000000000)
1324 /*
1325 * this class is idle enough, reset avgidle.
1326 * (TS_DELTA returns 2000000000 ns when delta is large.)
1327 */
1328 cl->avgidle_ = cl->maxidle_;
1329
1330 /* get pkt_time (for class) in usec */
1331 #if 1 /* use approximation */
1332 pkt_time = pktlen * (long)cl->ps_per_byte_;
1333 pkt_time = PSEC_TO_NSEC(pkt_time);
1334 #else
1335 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1336 #endif
1337 idle -= pkt_time;
1338
1339 avgidle = cl->avgidle_;
1340 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1341 cl->avgidle_ = avgidle;
1342
1343 /* Are we overlimit ? */
1344 if (avgidle <= 0) {
1345 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1346 #if 1 /* ALTQ */
1347 /*
1348 * need some lower bound for avgidle, otherwise
1349 * a borrowing class gets unbounded penalty.
1350 */
1351 if (avgidle < cl->minidle_)
1352 avgidle = cl->avgidle_ = cl->minidle_;
1353 #endif
1354 /* set next idle to make avgidle 0 */
1355 tidle = pkt_time +
1356 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1357 TS_ADD_DELTA(nowp, tidle, &cl->undertime_);
1358 ++cl->stats_.over;
1359 } else {
1360 cl->avgidle_ =
1361 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1362 cl->undertime_.tv_sec = 0;
1363 if (cl->sleeping_) {
1364 CALLOUT_STOP(&cl->callout_);
1365 cl->sleeping_ = 0;
1366 }
1367 }
1368
1369 if (borrows != NULL) {
1370 if (borrows != cl)
1371 ++cl->stats_.borrows;
1372 else
1373 borrows = NULL;
1374 }
1375 cl->last_ = ifd->ifnow_;
1376 cl->last_pkttime_ = pkt_time;
1377
1378 #if 1
1379 if (cl->parent_ == NULL && cl != cl0) {
1380 /* take stats of root class */
1381 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1382 }
1383 #endif
1384
1385 cl = cl->parent_;
1386 }
1387
1388 /*
1389 * Check to see if cutoff needs to set to a new level.
1390 */
1391 cl = ifd->class_[ifd->qo_];
1392 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1393 #if 1 /* ALTQ */
1394 if ((qlen(cl->q_) <= 0) || TS_LT(nowp, &borrowed->undertime_)) {
1395 rmc_tl_satisfied(ifd, nowp);
1396 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1397 } else {
1398 ifd->cutoff_ = borrowed->depth_;
1399 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1400 }
1401 #else /* !ALTQ */
1402 if ((qlen(cl->q_) <= 1) || TS_LT(&now, &borrowed->undertime_)) {
1403 reset_cutoff(ifd);
1404 #ifdef notdef
1405 rmc_tl_satisfied(ifd, &now);
1406 #endif
1407 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1408 } else {
1409 ifd->cutoff_ = borrowed->depth_;
1410 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1411 }
1412 #endif /* !ALTQ */
1413 }
1414
1415 /*
1416 * Release class slot
1417 */
1418 ifd->borrowed_[ifd->qo_] = NULL;
1419 ifd->class_[ifd->qo_] = NULL;
1420 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1421 ifd->queued_--;
1422 }
1423
1424 /*
1425 * void
1426 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1427 * over-limit action routines. These get invoked by rmc_under_limit()
1428 * if a class with packets to send if over its bandwidth limit & can't
1429 * borrow from a parent class.
1430 *
1431 * Returns: NONE
1432 */
1433
1434 static void
1435 rmc_drop_action(struct rm_class *cl)
1436 {
1437 struct rm_ifdat *ifd = cl->ifdat_;
1438
1439 ASSERT(qlen(cl->q_) > 0);
1440 _rmc_dropq(cl);
1441 if (qempty(cl->q_))
1442 ifd->na_[cl->pri_]--;
1443 }
1444
1445 void
1446 rmc_dropall(struct rm_class *cl)
1447 {
1448 struct rm_ifdat *ifd = cl->ifdat_;
1449
1450 if (!qempty(cl->q_)) {
1451 _flushq(cl->q_);
1452
1453 ifd->na_[cl->pri_]--;
1454 }
1455 }
1456
1457 #if (__FreeBSD_version > 300000)
1458 static int tvhzto(struct timeval *);
1459
1460 static int
1461 tvhzto(struct timeval *tv)
1462 {
1463 struct timeval t2;
1464
1465 getmicrotime(&t2);
1466 t2.tv_sec = tv->tv_sec - t2.tv_sec;
1467 t2.tv_usec = tv->tv_usec - t2.tv_usec;
1468 return (tvtohz(&t2));
1469 }
1470 #endif /* __FreeBSD_version > 300000 */
1471
1472 /*
1473 * void
1474 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1475 * delay action routine. It is invoked via rmc_under_limit when the
1476 * packet is discoverd to be overlimit.
1477 *
1478 * If the delay action is result of borrow class being overlimit, then
1479 * delay for the offtime of the borrowing class that is overlimit.
1480 *
1481 * Returns: NONE
1482 */
1483
1484 void
1485 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
1486 {
1487 int t;
1488 long ndelay, extradelay;
1489
1490 cl->stats_.overactions++;
1491 if (borrow != NULL)
1492 TS_DELTA(&borrow->undertime_, &cl->overtime_, ndelay);
1493 else
1494 TS_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
1495 #ifndef BORROW_OFFTIME
1496 ndelay += cl->offtime_;
1497 #endif
1498
1499 if (!cl->sleeping_) {
1500 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1501 #ifdef BORROW_OFFTIME
1502 if (borrow != NULL)
1503 extradelay = borrow->offtime_;
1504 else
1505 #endif
1506 extradelay = cl->offtime_;
1507
1508 #ifdef ALTQ
1509 /*
1510 * XXX recalculate suspend time:
1511 * current undertime is (tidle + pkt_time) calculated
1512 * from the last transmission.
1513 * tidle: time required to bring avgidle back to 0
1514 * pkt_time: target waiting time for this class
1515 * we need to replace pkt_time by offtime
1516 */
1517 extradelay -= cl->last_pkttime_;
1518 #endif
1519 if (extradelay > 0) {
1520 TS_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1521 ndelay += extradelay;
1522 }
1523
1524 cl->sleeping_ = 1;
1525 cl->stats_.delays++;
1526
1527 /*
1528 * Since packets are phased randomly with respect to the
1529 * clock, 1 tick (the next clock tick) can be an arbitrarily
1530 * short time so we have to wait for at least two ticks.
1531 * NOTE: If there's no other traffic, we need the timer as
1532 * a 'backstop' to restart this class.
1533 */
1534 if (NSEC_TO_USEC(ndelay) > tick * 2) {
1535 #ifdef __FreeBSD__
1536 /* FreeBSD rounds up the tick */
1537 t = tvhzto(&cl->undertime_);
1538 #else
1539 /* other BSDs round down the tick */
1540 t = tshzto(&cl->undertime_) + 1;
1541 #endif
1542 } else
1543 t = 2;
1544 CALLOUT_RESET(&cl->callout_, t,
1545 (timeout_t *)rmc_restart, (void *)cl);
1546 }
1547 }
1548
1549 /*
1550 * void
1551 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1552 * called by the system timer code & is responsible checking if the
1553 * class is still sleeping (it might have been restarted as a side
1554 * effect of the queue scan on a packet arrival) and, if so, restarting
1555 * output for the class. Inspecting the class state & restarting output
1556 * require locking the class structure. In general the driver is
1557 * responsible for locking but this is the only routine that is not
1558 * called directly or indirectly from the interface driver so it has
1559 * know about system locking conventions. Under bsd, locking is done
1560 * by raising IPL to splnet so that's what's implemented here. On a
1561 * different system this would probably need to be changed.
1562 *
1563 * Returns: NONE
1564 */
1565
1566 static void
1567 rmc_restart(struct rm_class *cl)
1568 {
1569 struct rm_ifdat *ifd = cl->ifdat_;
1570 int s;
1571
1572 s = splnet();
1573 if (cl->sleeping_) {
1574 cl->sleeping_ = 0;
1575 cl->undertime_.tv_sec = 0;
1576
1577 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1578 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1579 (ifd->restart)(ifd->ifq_);
1580 }
1581 }
1582 splx(s);
1583 }
1584
1585 /*
1586 * void
1587 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1588 * handling routine for the root class of the link sharing structure.
1589 *
1590 * Returns: NONE
1591 */
1592
1593 static void
1594 rmc_root_overlimit(struct rm_class *cl,
1595 struct rm_class *borrow)
1596 {
1597 panic("rmc_root_overlimit");
1598 }
1599
1600 /*
1601 * Packet Queue handling routines. Eventually, this is to localize the
1602 * effects on the code whether queues are red queues or droptail
1603 * queues.
1604 */
1605
1606 static int
1607 _rmc_addq(rm_class_t *cl, mbuf_t *m)
1608 {
1609 #ifdef ALTQ_RIO
1610 if (q_is_rio(cl->q_))
1611 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1612 #endif
1613 #ifdef ALTQ_RED
1614 if (q_is_red(cl->q_))
1615 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1616 #endif /* ALTQ_RED */
1617
1618 if (cl->flags_ & RMCF_CLEARDSCP)
1619 write_dsfield(m, cl->pktattr_, 0);
1620
1621 _addq(cl->q_, m);
1622 return (0);
1623 }
1624
1625 /* note: _rmc_dropq is not called for red */
1626 static void
1627 _rmc_dropq(rm_class_t *cl)
1628 {
1629 mbuf_t *m;
1630
1631 if ((m = _getq(cl->q_)) != NULL)
1632 m_freem(m);
1633 }
1634
1635 static mbuf_t *
1636 _rmc_getq(rm_class_t *cl)
1637 {
1638 #ifdef ALTQ_RIO
1639 if (q_is_rio(cl->q_))
1640 return rio_getq((rio_t *)cl->red_, cl->q_);
1641 #endif
1642 #ifdef ALTQ_RED
1643 if (q_is_red(cl->q_))
1644 return red_getq(cl->red_, cl->q_);
1645 #endif
1646 return _getq(cl->q_);
1647 }
1648
1649 static mbuf_t *
1650 _rmc_pollq(rm_class_t *cl)
1651 {
1652 return qhead(cl->q_);
1653 }
1654
1655 #ifdef CBQ_TRACE
1656
1657 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1658 struct cbqtrace *cbqtrace_ptr = NULL;
1659 int cbqtrace_count;
1660
1661 /*
1662 * DDB hook to trace cbq events:
1663 * the last 1024 events are held in a circular buffer.
1664 * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1665 */
1666 void cbqtrace_dump(int);
1667 static char *rmc_funcname(void *);
1668
1669 static struct rmc_funcs {
1670 void *func;
1671 char *name;
1672 } rmc_funcs[] =
1673 {
1674 rmc_init, "rmc_init",
1675 rmc_queue_packet, "rmc_queue_packet",
1676 rmc_under_limit, "rmc_under_limit",
1677 rmc_update_class_util, "rmc_update_class_util",
1678 rmc_delay_action, "rmc_delay_action",
1679 rmc_restart, "rmc_restart",
1680 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1681 NULL, NULL
1682 };
1683
1684 static char *
1685 rmc_funcname(void *func)
1686 {
1687 struct rmc_funcs *fp;
1688
1689 for (fp = rmc_funcs; fp->func != NULL; fp++)
1690 if (fp->func == func)
1691 return (fp->name);
1692 return ("unknown");
1693 }
1694
1695 void
1696 cbqtrace_dump(int counter)
1697 {
1698 int i, *p;
1699 char *cp;
1700
1701 counter = counter % NCBQTRACE;
1702 p = (int *)&cbqtrace_buffer[counter];
1703
1704 for (i=0; i<20; i++) {
1705 printf("[0x%x] ", *p++);
1706 printf("%s: ", rmc_funcname((void *)*p++));
1707 cp = (char *)p++;
1708 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1709 printf("%d\n",*p++);
1710
1711 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1712 p = (int *)cbqtrace_buffer;
1713 }
1714 }
1715 #endif /* CBQ_TRACE */
1716 #endif /* ALTQ_CBQ */
1717
1718 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
1719 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1720
1721 void
1722 _addq(class_queue_t *q, mbuf_t *m)
1723 {
1724 mbuf_t *m0;
1725
1726 if ((m0 = qtail(q)) != NULL)
1727 m->m_nextpkt = m0->m_nextpkt;
1728 else
1729 m0 = m;
1730 m0->m_nextpkt = m;
1731 qtail(q) = m;
1732 qlen(q)++;
1733 }
1734
1735 mbuf_t *
1736 _getq(class_queue_t *q)
1737 {
1738 mbuf_t *m, *m0;
1739
1740 if ((m = qtail(q)) == NULL)
1741 return (NULL);
1742 if ((m0 = m->m_nextpkt) != m)
1743 m->m_nextpkt = m0->m_nextpkt;
1744 else {
1745 ASSERT(qlen(q) == 1);
1746 qtail(q) = NULL;
1747 }
1748 qlen(q)--;
1749 m0->m_nextpkt = NULL;
1750 return (m0);
1751 }
1752
1753 /* drop a packet at the tail of the queue */
1754 mbuf_t *
1755 _getq_tail(class_queue_t *q)
1756 {
1757 mbuf_t *m, *m0, *prev;
1758
1759 if ((m = m0 = qtail(q)) == NULL)
1760 return NULL;
1761 do {
1762 prev = m0;
1763 m0 = m0->m_nextpkt;
1764 } while (m0 != m);
1765 prev->m_nextpkt = m->m_nextpkt;
1766 if (prev == m) {
1767 ASSERT(qlen(q) == 1);
1768 qtail(q) = NULL;
1769 } else
1770 qtail(q) = prev;
1771 qlen(q)--;
1772 m->m_nextpkt = NULL;
1773 return (m);
1774 }
1775
1776 /* randomly select a packet in the queue */
1777 mbuf_t *
1778 _getq_random(class_queue_t *q)
1779 {
1780 struct mbuf *m;
1781 int i, n;
1782
1783 if ((m = qtail(q)) == NULL)
1784 return NULL;
1785 if (m->m_nextpkt == m) {
1786 ASSERT(qlen(q) == 1);
1787 qtail(q) = NULL;
1788 } else {
1789 struct mbuf *prev = NULL;
1790
1791 n = cprng_fast32() % qlen(q) + 1;
1792 for (i = 0; i < n; i++) {
1793 prev = m;
1794 m = m->m_nextpkt;
1795 }
1796 prev->m_nextpkt = m->m_nextpkt;
1797 if (m == qtail(q))
1798 qtail(q) = prev;
1799 }
1800 qlen(q)--;
1801 m->m_nextpkt = NULL;
1802 return (m);
1803 }
1804
1805 void
1806 _removeq(class_queue_t *q, mbuf_t *m)
1807 {
1808 mbuf_t *m0, *prev;
1809
1810 m0 = qtail(q);
1811 do {
1812 prev = m0;
1813 m0 = m0->m_nextpkt;
1814 } while (m0 != m);
1815 prev->m_nextpkt = m->m_nextpkt;
1816 if (prev == m)
1817 qtail(q) = NULL;
1818 else if (qtail(q) == m)
1819 qtail(q) = prev;
1820 qlen(q)--;
1821 }
1822
1823 void
1824 _flushq(class_queue_t *q)
1825 {
1826 mbuf_t *m;
1827
1828 while ((m = _getq(q)) != NULL)
1829 m_freem(m);
1830 ASSERT(qlen(q) == 0);
1831 }
1832
1833 #endif /* !__GNUC__ || ALTQ_DEBUG */
1834 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
1835