altq_rmclass.c revision 1.16 1 /* $NetBSD: altq_rmclass.c,v 1.16 2006/10/24 02:48:04 mrg Exp $ */
2 /* $KAME: altq_rmclass.c,v 1.19 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (c) 1991-1997 Regents of the University of California.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the Network Research
19 * Group at Lawrence Berkeley Laboratory.
20 * 4. Neither the name of the University nor of the Laboratory may be used
21 * to endorse or promote products derived from this software without
22 * specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * LBL code modified by speer (at) eng.sun.com, May 1977.
37 * For questions and/or comments, please send mail to cbq (at) ee.lbl.gov
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: altq_rmclass.c,v 1.16 2006/10/24 02:48:04 mrg Exp $");
42
43 #ident "@(#)rm_class.c 1.48 97/12/05 SMI"
44
45 #ifdef _KERNEL_OPT
46 #include "opt_altq.h"
47 #include "opt_inet.h"
48 #endif
49
50 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
51
52 #include <sys/param.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/socket.h>
56 #include <sys/systm.h>
57 #include <sys/errno.h>
58 #include <sys/time.h>
59 #ifdef ALTQ3_COMPAT
60 #include <sys/kernel.h>
61 #endif
62
63 #include <net/if.h>
64 #ifdef ALTQ3_COMPAT
65 #include <netinet/in.h>
66 #include <netinet/in_systm.h>
67 #include <netinet/ip.h>
68 #endif
69
70 #include <altq/altq.h>
71 #include <altq/altq_rmclass.h>
72 #include <altq/altq_rmclass_debug.h>
73 #include <altq/altq_red.h>
74 #include <altq/altq_rio.h>
75
76 /*
77 * Local Macros
78 */
79
80 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
81
82 /*
83 * Local routines.
84 */
85
86 static int rmc_satisfied(struct rm_class *, struct timeval *);
87 static void rmc_wrr_set_weights(struct rm_ifdat *);
88 static void rmc_depth_compute(struct rm_class *);
89 static void rmc_depth_recompute(rm_class_t *);
90
91 static mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int);
92 static mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int);
93
94 static int _rmc_addq(rm_class_t *, mbuf_t *);
95 static void _rmc_dropq(rm_class_t *);
96 static mbuf_t *_rmc_getq(rm_class_t *);
97 static mbuf_t *_rmc_pollq(rm_class_t *);
98
99 static int rmc_under_limit(struct rm_class *, struct timeval *);
100 static void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *);
101 static void rmc_drop_action(struct rm_class *);
102 static void rmc_restart(struct rm_class *);
103 static void rmc_root_overlimit(struct rm_class *, struct rm_class *);
104
105 #define BORROW_OFFTIME
106 /*
107 * BORROW_OFFTIME (experimental):
108 * borrow the offtime of the class borrowing from.
109 * the reason is that when its own offtime is set, the class is unable
110 * to borrow much, especially when cutoff is taking effect.
111 * but when the borrowed class is overloaded (advidle is close to minidle),
112 * use the borrowing class's offtime to avoid overload.
113 */
114 #define ADJUST_CUTOFF
115 /*
116 * ADJUST_CUTOFF (experimental):
117 * if no underlimit class is found due to cutoff, increase cutoff and
118 * retry the scheduling loop.
119 * also, don't invoke delay_actions while cutoff is taking effect,
120 * since a sleeping class won't have a chance to be scheduled in the
121 * next loop.
122 *
123 * now heuristics for setting the top-level variable (cutoff_) becomes:
124 * 1. if a packet arrives for a not-overlimit class, set cutoff
125 * to the depth of the class.
126 * 2. if cutoff is i, and a packet arrives for an overlimit class
127 * with an underlimit ancestor at a lower level than i (say j),
128 * then set cutoff to j.
129 * 3. at scheduling a packet, if there is no underlimit class
130 * due to the current cutoff level, increase cutoff by 1 and
131 * then try to schedule again.
132 */
133
134 /*
135 * rm_class_t *
136 * rmc_newclass(...) - Create a new resource management class at priority
137 * 'pri' on the interface given by 'ifd'.
138 *
139 * nsecPerByte is the data rate of the interface in nanoseconds/byte.
140 * E.g., 800 for a 10Mb/s ethernet. If the class gets less
141 * than 100% of the bandwidth, this number should be the
142 * 'effective' rate for the class. Let f be the
143 * bandwidth fraction allocated to this class, and let
144 * nsPerByte be the data rate of the output link in
145 * nanoseconds/byte. Then nsecPerByte is set to
146 * nsPerByte / f. E.g., 1600 (= 800 / .5)
147 * for a class that gets 50% of an ethernet's bandwidth.
148 *
149 * action the routine to call when the class is over limit.
150 *
151 * maxq max allowable queue size for class (in packets).
152 *
153 * parent parent class pointer.
154 *
155 * borrow class to borrow from (should be either 'parent' or null).
156 *
157 * maxidle max value allowed for class 'idle' time estimate (this
158 * parameter determines how large an initial burst of packets
159 * can be before overlimit action is invoked.
160 *
161 * offtime how long 'delay' action will delay when class goes over
162 * limit (this parameter determines the steady-state burst
163 * size when a class is running over its limit).
164 *
165 * Maxidle and offtime have to be computed from the following: If the
166 * average packet size is s, the bandwidth fraction allocated to this
167 * class is f, we want to allow b packet bursts, and the gain of the
168 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
169 *
170 * ptime = s * nsPerByte * (1 - f) / f
171 * maxidle = ptime * (1 - g^b) / g^b
172 * minidle = -ptime * (1 / (f - 1))
173 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
174 *
175 * Operationally, it's convenient to specify maxidle & offtime in units
176 * independent of the link bandwidth so the maxidle & offtime passed to
177 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
178 * (The constant factor is a scale factor needed to make the parameters
179 * integers. This scaling also means that the 'unscaled' values of
180 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
181 * not nanoseconds.) Also note that the 'idle' filter computation keeps
182 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
183 * maxidle also must be scaled upward by this value. Thus, the passed
184 * values for maxidle and offtime can be computed as follows:
185 *
186 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
187 * offtime = offtime * 8 / (1000 * nsecPerByte)
188 *
189 * When USE_HRTIME is employed, then maxidle and offtime become:
190 * maxidle = maxilde * (8.0 / nsecPerByte);
191 * offtime = offtime * (8.0 / nsecPerByte);
192 */
193 struct rm_class *
194 rmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte,
195 void (*action)(rm_class_t *, rm_class_t *), int maxq,
196 struct rm_class *parent, struct rm_class *borrow, u_int maxidle,
197 int minidle, u_int offtime, int pktsize, int flags)
198 {
199 struct rm_class *cl;
200 struct rm_class *peer;
201 int s;
202
203 if (pri >= RM_MAXPRIO)
204 return (NULL);
205 #ifndef ALTQ_RED
206 if (flags & RMCF_RED) {
207 #ifdef ALTQ_DEBUG
208 printf("rmc_newclass: RED not configured for CBQ!\n");
209 #endif
210 return (NULL);
211 }
212 #endif
213 #ifndef ALTQ_RIO
214 if (flags & RMCF_RIO) {
215 #ifdef ALTQ_DEBUG
216 printf("rmc_newclass: RIO not configured for CBQ!\n");
217 #endif
218 return (NULL);
219 }
220 #endif
221
222 cl = malloc(sizeof(struct rm_class), M_DEVBUF, M_WAITOK|M_ZERO);
223 if (cl == NULL)
224 return (NULL);
225 CALLOUT_INIT(&cl->callout_);
226
227 cl->q_ = malloc(sizeof(class_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
228 if (cl->q_ == NULL) {
229 free(cl, M_DEVBUF);
230 return (NULL);
231 }
232
233 /*
234 * Class initialization.
235 */
236 cl->children_ = NULL;
237 cl->parent_ = parent;
238 cl->borrow_ = borrow;
239 cl->leaf_ = 1;
240 cl->ifdat_ = ifd;
241 cl->pri_ = pri;
242 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
243 cl->depth_ = 0;
244 cl->qthresh_ = 0;
245 cl->ns_per_byte_ = nsecPerByte;
246
247 qlimit(cl->q_) = maxq;
248 qtype(cl->q_) = Q_DROPHEAD;
249 qlen(cl->q_) = 0;
250 cl->flags_ = flags;
251
252 #if 1 /* minidle is also scaled in ALTQ */
253 cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
254 if (cl->minidle_ > 0)
255 cl->minidle_ = 0;
256 #else
257 cl->minidle_ = minidle;
258 #endif
259 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
260 if (cl->maxidle_ == 0)
261 cl->maxidle_ = 1;
262 #if 1 /* offtime is also scaled in ALTQ */
263 cl->avgidle_ = cl->maxidle_;
264 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
265 if (cl->offtime_ == 0)
266 cl->offtime_ = 1;
267 #else
268 cl->avgidle_ = 0;
269 cl->offtime_ = (offtime * nsecPerByte) / 8;
270 #endif
271 cl->overlimit = action;
272
273 #ifdef ALTQ_RED
274 if (flags & (RMCF_RED|RMCF_RIO)) {
275 int red_flags, red_pkttime;
276
277 red_flags = 0;
278 if (flags & RMCF_ECN)
279 red_flags |= REDF_ECN;
280 if (flags & RMCF_FLOWVALVE)
281 red_flags |= REDF_FLOWVALVE;
282 #ifdef ALTQ_RIO
283 if (flags & RMCF_CLEARDSCP)
284 red_flags |= RIOF_CLEARDSCP;
285 #endif
286 red_pkttime = nsecPerByte * pktsize / 1000;
287
288 if (flags & RMCF_RED) {
289 cl->red_ = red_alloc(0, 0,
290 qlimit(cl->q_) * 10/100,
291 qlimit(cl->q_) * 30/100,
292 red_flags, red_pkttime);
293 if (cl->red_ != NULL)
294 qtype(cl->q_) = Q_RED;
295 }
296 #ifdef ALTQ_RIO
297 else {
298 cl->red_ = (red_t *)rio_alloc(0, NULL,
299 red_flags, red_pkttime);
300 if (cl->red_ != NULL)
301 qtype(cl->q_) = Q_RIO;
302 }
303 #endif
304 }
305 #endif /* ALTQ_RED */
306
307 /*
308 * put the class into the class tree
309 */
310 s = splnet();
311 if ((peer = ifd->active_[pri]) != NULL) {
312 /* find the last class at this pri */
313 cl->peer_ = peer;
314 while (peer->peer_ != ifd->active_[pri])
315 peer = peer->peer_;
316 peer->peer_ = cl;
317 } else {
318 ifd->active_[pri] = cl;
319 cl->peer_ = cl;
320 }
321
322 if (cl->parent_) {
323 cl->next_ = parent->children_;
324 parent->children_ = cl;
325 parent->leaf_ = 0;
326 }
327
328 /*
329 * Compute the depth of this class and its ancestors in the class
330 * hierarchy.
331 */
332 rmc_depth_compute(cl);
333
334 /*
335 * If CBQ's WRR is enabled, then initialize the class WRR state.
336 */
337 if (ifd->wrr_) {
338 ifd->num_[pri]++;
339 ifd->alloc_[pri] += cl->allotment_;
340 rmc_wrr_set_weights(ifd);
341 }
342 splx(s);
343 return (cl);
344 }
345
346 int
347 rmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle,
348 int minidle, u_int offtime, int pktsize __unused)
349 {
350 struct rm_ifdat *ifd;
351 u_int old_allotment;
352 int s;
353
354 ifd = cl->ifdat_;
355 old_allotment = cl->allotment_;
356
357 s = splnet();
358 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
359 cl->qthresh_ = 0;
360 cl->ns_per_byte_ = nsecPerByte;
361
362 qlimit(cl->q_) = maxq;
363
364 #if 1 /* minidle is also scaled in ALTQ */
365 cl->minidle_ = (minidle * nsecPerByte) / 8;
366 if (cl->minidle_ > 0)
367 cl->minidle_ = 0;
368 #else
369 cl->minidle_ = minidle;
370 #endif
371 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
372 if (cl->maxidle_ == 0)
373 cl->maxidle_ = 1;
374 #if 1 /* offtime is also scaled in ALTQ */
375 cl->avgidle_ = cl->maxidle_;
376 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
377 if (cl->offtime_ == 0)
378 cl->offtime_ = 1;
379 #else
380 cl->avgidle_ = 0;
381 cl->offtime_ = (offtime * nsecPerByte) / 8;
382 #endif
383
384 /*
385 * If CBQ's WRR is enabled, then initialize the class WRR state.
386 */
387 if (ifd->wrr_) {
388 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
389 rmc_wrr_set_weights(ifd);
390 }
391 splx(s);
392 return (0);
393 }
394
395 /*
396 * static void
397 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
398 * the appropriate run robin weights for the CBQ weighted round robin
399 * algorithm.
400 *
401 * Returns: NONE
402 */
403
404 static void
405 rmc_wrr_set_weights(struct rm_ifdat *ifd)
406 {
407 int i;
408 struct rm_class *cl, *clh;
409
410 for (i = 0; i < RM_MAXPRIO; i++) {
411 /*
412 * This is inverted from that of the simulator to
413 * maintain precision.
414 */
415 if (ifd->num_[i] == 0)
416 ifd->M_[i] = 0;
417 else
418 ifd->M_[i] = ifd->alloc_[i] /
419 (ifd->num_[i] * ifd->maxpkt_);
420 /*
421 * Compute the weighted allotment for each class.
422 * This takes the expensive div instruction out
423 * of the main loop for the wrr scheduling path.
424 * These only get recomputed when a class comes or
425 * goes.
426 */
427 if (ifd->active_[i] != NULL) {
428 clh = cl = ifd->active_[i];
429 do {
430 /* safe-guard for slow link or alloc_ == 0 */
431 if (ifd->M_[i] == 0)
432 cl->w_allotment_ = 0;
433 else
434 cl->w_allotment_ = cl->allotment_ /
435 ifd->M_[i];
436 cl = cl->peer_;
437 } while ((cl != NULL) && (cl != clh));
438 }
439 }
440 }
441
442 int
443 rmc_get_weight(struct rm_ifdat *ifd, int pri)
444 {
445 if ((pri >= 0) && (pri < RM_MAXPRIO))
446 return (ifd->M_[pri]);
447 else
448 return (0);
449 }
450
451 /*
452 * static void
453 * rmc_depth_compute(struct rm_class *cl) - This function computes the
454 * appropriate depth of class 'cl' and its ancestors.
455 *
456 * Returns: NONE
457 */
458
459 static void
460 rmc_depth_compute(struct rm_class *cl)
461 {
462 rm_class_t *t = cl, *p;
463
464 /*
465 * Recompute the depth for the branch of the tree.
466 */
467 while (t != NULL) {
468 p = t->parent_;
469 if (p && (t->depth_ >= p->depth_)) {
470 p->depth_ = t->depth_ + 1;
471 t = p;
472 } else
473 t = NULL;
474 }
475 }
476
477 /*
478 * static void
479 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
480 * the depth of the tree after a class has been deleted.
481 *
482 * Returns: NONE
483 */
484
485 static void
486 rmc_depth_recompute(rm_class_t *cl)
487 {
488 #if 1 /* ALTQ */
489 rm_class_t *p, *t;
490
491 p = cl;
492 while (p != NULL) {
493 if ((t = p->children_) == NULL) {
494 p->depth_ = 0;
495 } else {
496 int cdepth = 0;
497
498 while (t != NULL) {
499 if (t->depth_ > cdepth)
500 cdepth = t->depth_;
501 t = t->next_;
502 }
503
504 if (p->depth_ == cdepth + 1)
505 /* no change to this parent */
506 return;
507
508 p->depth_ = cdepth + 1;
509 }
510
511 p = p->parent_;
512 }
513 #else
514 rm_class_t *t;
515
516 if (cl->depth_ >= 1) {
517 if (cl->children_ == NULL) {
518 cl->depth_ = 0;
519 } else if ((t = cl->children_) != NULL) {
520 while (t != NULL) {
521 if (t->children_ != NULL)
522 rmc_depth_recompute(t);
523 t = t->next_;
524 }
525 } else
526 rmc_depth_compute(cl);
527 }
528 #endif
529 }
530
531 /*
532 * void
533 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
534 * function deletes a class from the link-sharing structure and frees
535 * all resources associated with the class.
536 *
537 * Returns: NONE
538 */
539
540 void
541 rmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl)
542 {
543 struct rm_class *p, *head, *previous;
544 int s;
545
546 ASSERT(cl->children_ == NULL);
547
548 if (cl->sleeping_)
549 CALLOUT_STOP(&cl->callout_);
550
551 s = splnet();
552 /*
553 * Free packets in the packet queue.
554 * XXX - this may not be a desired behavior. Packets should be
555 * re-queued.
556 */
557 rmc_dropall(cl);
558
559 /*
560 * If the class has a parent, then remove the class from the
561 * class from the parent's children chain.
562 */
563 if (cl->parent_ != NULL) {
564 head = cl->parent_->children_;
565 p = previous = head;
566 if (head->next_ == NULL) {
567 ASSERT(head == cl);
568 cl->parent_->children_ = NULL;
569 cl->parent_->leaf_ = 1;
570 } else while (p != NULL) {
571 if (p == cl) {
572 if (cl == head)
573 cl->parent_->children_ = cl->next_;
574 else
575 previous->next_ = cl->next_;
576 cl->next_ = NULL;
577 p = NULL;
578 } else {
579 previous = p;
580 p = p->next_;
581 }
582 }
583 }
584
585 /*
586 * Delete class from class priority peer list.
587 */
588 if ((p = ifd->active_[cl->pri_]) != NULL) {
589 /*
590 * If there is more than one member of this priority
591 * level, then look for class(cl) in the priority level.
592 */
593 if (p != p->peer_) {
594 while (p->peer_ != cl)
595 p = p->peer_;
596 p->peer_ = cl->peer_;
597
598 if (ifd->active_[cl->pri_] == cl)
599 ifd->active_[cl->pri_] = cl->peer_;
600 } else {
601 ASSERT(p == cl);
602 ifd->active_[cl->pri_] = NULL;
603 }
604 }
605
606 /*
607 * Recompute the WRR weights.
608 */
609 if (ifd->wrr_) {
610 ifd->alloc_[cl->pri_] -= cl->allotment_;
611 ifd->num_[cl->pri_]--;
612 rmc_wrr_set_weights(ifd);
613 }
614
615 /*
616 * Re-compute the depth of the tree.
617 */
618 #if 1 /* ALTQ */
619 rmc_depth_recompute(cl->parent_);
620 #else
621 rmc_depth_recompute(ifd->root_);
622 #endif
623
624 splx(s);
625
626 /*
627 * Free the class structure.
628 */
629 if (cl->red_ != NULL) {
630 #ifdef ALTQ_RIO
631 if (q_is_rio(cl->q_))
632 rio_destroy((rio_t *)cl->red_);
633 #endif
634 #ifdef ALTQ_RED
635 if (q_is_red(cl->q_))
636 red_destroy(cl->red_);
637 #endif
638 }
639 free(cl->q_, M_DEVBUF);
640 free(cl, M_DEVBUF);
641 }
642
643
644 /*
645 * void
646 * rmc_init(...) - Initialize the resource management data structures
647 * associated with the output portion of interface 'ifp'. 'ifd' is
648 * where the structures will be built (for backwards compatibility, the
649 * structures aren't kept in the ifnet struct). 'nsecPerByte'
650 * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
651 * 'restart' is the driver-specific routine that the generic 'delay
652 * until under limit' action will call to restart output. `maxq'
653 * is the queue size of the 'link' & 'default' classes. 'maxqueued'
654 * is the maximum number of packets that the resource management
655 * code will allow to be queued 'downstream' (this is typically 1).
656 *
657 * Returns: NONE
658 */
659
660 void
661 rmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte,
662 void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle,
663 int minidle, u_int offtime, int flags)
664 {
665 int i, mtu;
666
667 /*
668 * Initialize the CBQ tracing/debug facility.
669 */
670 CBQTRACEINIT();
671
672 (void)memset((char *)ifd, 0, sizeof (*ifd));
673 mtu = ifq->altq_ifp->if_mtu;
674 ifd->ifq_ = ifq;
675 ifd->restart = restart;
676 ifd->maxqueued_ = maxqueued;
677 ifd->ns_per_byte_ = nsecPerByte;
678 ifd->maxpkt_ = mtu;
679 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
680 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
681 #if 1
682 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
683 if (mtu * nsecPerByte > 10 * 1000000)
684 ifd->maxiftime_ /= 4;
685 #endif
686
687 reset_cutoff(ifd);
688 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
689
690 /*
691 * Initialize the CBQ's WRR state.
692 */
693 for (i = 0; i < RM_MAXPRIO; i++) {
694 ifd->alloc_[i] = 0;
695 ifd->M_[i] = 0;
696 ifd->num_[i] = 0;
697 ifd->na_[i] = 0;
698 ifd->active_[i] = NULL;
699 }
700
701 /*
702 * Initialize current packet state.
703 */
704 ifd->qi_ = 0;
705 ifd->qo_ = 0;
706 for (i = 0; i < RM_MAXQUEUED; i++) {
707 ifd->class_[i] = NULL;
708 ifd->curlen_[i] = 0;
709 ifd->borrowed_[i] = NULL;
710 }
711
712 /*
713 * Create the root class of the link-sharing structure.
714 */
715 if ((ifd->root_ = rmc_newclass(0, ifd,
716 nsecPerByte,
717 rmc_root_overlimit, maxq, 0, 0,
718 maxidle, minidle, offtime,
719 0, 0)) == NULL) {
720 printf("rmc_init: root class not allocated\n");
721 return ;
722 }
723 ifd->root_->depth_ = 0;
724 }
725
726 /*
727 * void
728 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
729 * mbuf 'm' to queue for resource class 'cl'. This routine is called
730 * by a driver's if_output routine. This routine must be called with
731 * output packet completion interrupts locked out (to avoid racing with
732 * rmc_dequeue_next).
733 *
734 * Returns: 0 on successful queueing
735 * -1 when packet drop occurs
736 */
737 int
738 rmc_queue_packet(struct rm_class *cl, mbuf_t *m)
739 {
740 struct timeval now;
741 struct rm_ifdat *ifd = cl->ifdat_;
742 int cpri = cl->pri_;
743 int is_empty = qempty(cl->q_);
744
745 RM_GETTIME(now);
746 if (ifd->cutoff_ > 0) {
747 if (TV_LT(&cl->undertime_, &now)) {
748 if (ifd->cutoff_ > cl->depth_)
749 ifd->cutoff_ = cl->depth_;
750 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
751 }
752 #if 1 /* ALTQ */
753 else {
754 /*
755 * the class is overlimit. if the class has
756 * underlimit ancestors, set cutoff to the lowest
757 * depth among them.
758 */
759 struct rm_class *borrow = cl->borrow_;
760
761 while (borrow != NULL &&
762 borrow->depth_ < ifd->cutoff_) {
763 if (TV_LT(&borrow->undertime_, &now)) {
764 ifd->cutoff_ = borrow->depth_;
765 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
766 break;
767 }
768 borrow = borrow->borrow_;
769 }
770 }
771 #else /* !ALTQ */
772 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
773 if (TV_LT(&cl->borrow_->undertime_, &now)) {
774 ifd->cutoff_ = cl->borrow_->depth_;
775 CBQTRACE(rmc_queue_packet, 'ffob',
776 cl->borrow_->depth_);
777 }
778 }
779 #endif /* !ALTQ */
780 }
781
782 if (_rmc_addq(cl, m) < 0)
783 /* failed */
784 return (-1);
785
786 if (is_empty) {
787 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
788 ifd->na_[cpri]++;
789 }
790
791 if (qlen(cl->q_) > qlimit(cl->q_)) {
792 /* note: qlimit can be set to 0 or 1 */
793 rmc_drop_action(cl);
794 return (-1);
795 }
796 return (0);
797 }
798
799 /*
800 * void
801 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
802 * classes to see if there are satified.
803 */
804
805 static void
806 rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now)
807 {
808 int i;
809 rm_class_t *p, *bp;
810
811 for (i = RM_MAXPRIO - 1; i >= 0; i--) {
812 if ((bp = ifd->active_[i]) != NULL) {
813 p = bp;
814 do {
815 if (!rmc_satisfied(p, now)) {
816 ifd->cutoff_ = p->depth_;
817 return;
818 }
819 p = p->peer_;
820 } while (p != bp);
821 }
822 }
823
824 reset_cutoff(ifd);
825 }
826
827 /*
828 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
829 */
830
831 static int
832 rmc_satisfied(struct rm_class *cl, struct timeval *now)
833 {
834 rm_class_t *p;
835
836 if (cl == NULL)
837 return (1);
838 if (TV_LT(now, &cl->undertime_))
839 return (1);
840 if (cl->depth_ == 0) {
841 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
842 return (0);
843 else
844 return (1);
845 }
846 if (cl->children_ != NULL) {
847 p = cl->children_;
848 while (p != NULL) {
849 if (!rmc_satisfied(p, now))
850 return (0);
851 p = p->next_;
852 }
853 }
854
855 return (1);
856 }
857
858 /*
859 * Return 1 if class 'cl' is under limit or can borrow from a parent,
860 * 0 if overlimit. As a side-effect, this routine will invoke the
861 * class overlimit action if the class if overlimit.
862 */
863
864 static int
865 rmc_under_limit(struct rm_class *cl, struct timeval *now)
866 {
867 rm_class_t *p = cl;
868 rm_class_t *top;
869 struct rm_ifdat *ifd = cl->ifdat_;
870
871 ifd->borrowed_[ifd->qi_] = NULL;
872 /*
873 * If cl is the root class, then always return that it is
874 * underlimit. Otherwise, check to see if the class is underlimit.
875 */
876 if (cl->parent_ == NULL)
877 return (1);
878
879 if (cl->sleeping_) {
880 if (TV_LT(now, &cl->undertime_))
881 return (0);
882
883 CALLOUT_STOP(&cl->callout_);
884 cl->sleeping_ = 0;
885 cl->undertime_.tv_sec = 0;
886 return (1);
887 }
888
889 top = NULL;
890 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
891 if (((cl = cl->borrow_) == NULL) ||
892 (cl->depth_ > ifd->cutoff_)) {
893 #ifdef ADJUST_CUTOFF
894 if (cl != NULL)
895 /* cutoff is taking effect, just
896 return false without calling
897 the delay action. */
898 return (0);
899 #endif
900 #ifdef BORROW_OFFTIME
901 /*
902 * check if the class can borrow offtime too.
903 * borrow offtime from the top of the borrow
904 * chain if the top class is not overloaded.
905 */
906 if (cl != NULL) {
907 /* cutoff is taking effect, use this class as top. */
908 top = cl;
909 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
910 }
911 if (top != NULL && top->avgidle_ == top->minidle_)
912 top = NULL;
913 p->overtime_ = *now;
914 (p->overlimit)(p, top);
915 #else
916 p->overtime_ = *now;
917 (p->overlimit)(p, NULL);
918 #endif
919 return (0);
920 }
921 top = cl;
922 }
923
924 if (cl != p)
925 ifd->borrowed_[ifd->qi_] = cl;
926 return (1);
927 }
928
929 /*
930 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
931 * Packet-by-packet round robin.
932 *
933 * The heart of the weighted round-robin scheduler, which decides which
934 * class next gets to send a packet. Highest priority first, then
935 * weighted round-robin within priorites.
936 *
937 * Each able-to-send class gets to send until its byte allocation is
938 * exhausted. Thus, the active pointer is only changed after a class has
939 * exhausted its allocation.
940 *
941 * If the scheduler finds no class that is underlimit or able to borrow,
942 * then the first class found that had a nonzero queue and is allowed to
943 * borrow gets to send.
944 */
945
946 static mbuf_t *
947 _rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op)
948 {
949 struct rm_class *cl = NULL, *first = NULL;
950 u_int deficit;
951 int cpri;
952 mbuf_t *m;
953 struct timeval now;
954
955 RM_GETTIME(now);
956
957 /*
958 * if the driver polls the top of the queue and then removes
959 * the polled packet, we must return the same packet.
960 */
961 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
962 cl = ifd->pollcache_;
963 cpri = cl->pri_;
964 if (ifd->efficient_) {
965 /* check if this class is overlimit */
966 if (cl->undertime_.tv_sec != 0 &&
967 rmc_under_limit(cl, &now) == 0)
968 first = cl;
969 }
970 ifd->pollcache_ = NULL;
971 goto _wrr_out;
972 }
973 else {
974 /* mode == ALTDQ_POLL || pollcache == NULL */
975 ifd->pollcache_ = NULL;
976 ifd->borrowed_[ifd->qi_] = NULL;
977 }
978 #ifdef ADJUST_CUTOFF
979 _again:
980 #endif
981 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
982 if (ifd->na_[cpri] == 0)
983 continue;
984 deficit = 0;
985 /*
986 * Loop through twice for a priority level, if some class
987 * was unable to send a packet the first round because
988 * of the weighted round-robin mechanism.
989 * During the second loop at this level, deficit==2.
990 * (This second loop is not needed if for every class,
991 * "M[cl->pri_])" times "cl->allotment" is greater than
992 * the byte size for the largest packet in the class.)
993 */
994 _wrr_loop:
995 cl = ifd->active_[cpri];
996 ASSERT(cl != NULL);
997 do {
998 if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
999 cl->bytes_alloc_ += cl->w_allotment_;
1000 if (!qempty(cl->q_)) {
1001 if ((cl->undertime_.tv_sec == 0) ||
1002 rmc_under_limit(cl, &now)) {
1003 if (cl->bytes_alloc_ > 0 || deficit > 1)
1004 goto _wrr_out;
1005
1006 /* underlimit but no alloc */
1007 deficit = 1;
1008 #if 1
1009 ifd->borrowed_[ifd->qi_] = NULL;
1010 #endif
1011 }
1012 else if (first == NULL && cl->borrow_ != NULL)
1013 first = cl; /* borrowing candidate */
1014 }
1015
1016 cl->bytes_alloc_ = 0;
1017 cl = cl->peer_;
1018 } while (cl != ifd->active_[cpri]);
1019
1020 if (deficit == 1) {
1021 /* first loop found an underlimit class with deficit */
1022 /* Loop on same priority level, with new deficit. */
1023 deficit = 2;
1024 goto _wrr_loop;
1025 }
1026 }
1027
1028 #ifdef ADJUST_CUTOFF
1029 /*
1030 * no underlimit class found. if cutoff is taking effect,
1031 * increase cutoff and try again.
1032 */
1033 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1034 ifd->cutoff_++;
1035 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1036 goto _again;
1037 }
1038 #endif /* ADJUST_CUTOFF */
1039 /*
1040 * If LINK_EFFICIENCY is turned on, then the first overlimit
1041 * class we encounter will send a packet if all the classes
1042 * of the link-sharing structure are overlimit.
1043 */
1044 reset_cutoff(ifd);
1045 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1046
1047 if (!ifd->efficient_ || first == NULL)
1048 return (NULL);
1049
1050 cl = first;
1051 cpri = cl->pri_;
1052 #if 0 /* too time-consuming for nothing */
1053 if (cl->sleeping_)
1054 CALLOUT_STOP(&cl->callout_);
1055 cl->sleeping_ = 0;
1056 cl->undertime_.tv_sec = 0;
1057 #endif
1058 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1059 ifd->cutoff_ = cl->borrow_->depth_;
1060
1061 /*
1062 * Deque the packet and do the book keeping...
1063 */
1064 _wrr_out:
1065 if (op == ALTDQ_REMOVE) {
1066 m = _rmc_getq(cl);
1067 if (m == NULL)
1068 panic("_rmc_wrr_dequeue_next");
1069 if (qempty(cl->q_))
1070 ifd->na_[cpri]--;
1071
1072 /*
1073 * Update class statistics and link data.
1074 */
1075 if (cl->bytes_alloc_ > 0)
1076 cl->bytes_alloc_ -= m_pktlen(m);
1077
1078 if ((cl->bytes_alloc_ <= 0) || first == cl)
1079 ifd->active_[cl->pri_] = cl->peer_;
1080 else
1081 ifd->active_[cl->pri_] = cl;
1082
1083 ifd->class_[ifd->qi_] = cl;
1084 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1085 ifd->now_[ifd->qi_] = now;
1086 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1087 ifd->queued_++;
1088 } else {
1089 /* mode == ALTDQ_PPOLL */
1090 m = _rmc_pollq(cl);
1091 ifd->pollcache_ = cl;
1092 }
1093 return (m);
1094 }
1095
1096 /*
1097 * Dequeue & return next packet from the highest priority class that
1098 * has a packet to send & has enough allocation to send it. This
1099 * routine is called by a driver whenever it needs a new packet to
1100 * output.
1101 */
1102 static mbuf_t *
1103 _rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op)
1104 {
1105 mbuf_t *m;
1106 int cpri;
1107 struct rm_class *cl, *first = NULL;
1108 struct timeval now;
1109
1110 RM_GETTIME(now);
1111
1112 /*
1113 * if the driver polls the top of the queue and then removes
1114 * the polled packet, we must return the same packet.
1115 */
1116 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1117 cl = ifd->pollcache_;
1118 cpri = cl->pri_;
1119 ifd->pollcache_ = NULL;
1120 goto _prr_out;
1121 } else {
1122 /* mode == ALTDQ_POLL || pollcache == NULL */
1123 ifd->pollcache_ = NULL;
1124 ifd->borrowed_[ifd->qi_] = NULL;
1125 }
1126 #ifdef ADJUST_CUTOFF
1127 _again:
1128 #endif
1129 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1130 if (ifd->na_[cpri] == 0)
1131 continue;
1132 cl = ifd->active_[cpri];
1133 ASSERT(cl != NULL);
1134 do {
1135 if (!qempty(cl->q_)) {
1136 if ((cl->undertime_.tv_sec == 0) ||
1137 rmc_under_limit(cl, &now))
1138 goto _prr_out;
1139 if (first == NULL && cl->borrow_ != NULL)
1140 first = cl;
1141 }
1142 cl = cl->peer_;
1143 } while (cl != ifd->active_[cpri]);
1144 }
1145
1146 #ifdef ADJUST_CUTOFF
1147 /*
1148 * no underlimit class found. if cutoff is taking effect, increase
1149 * cutoff and try again.
1150 */
1151 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1152 ifd->cutoff_++;
1153 goto _again;
1154 }
1155 #endif /* ADJUST_CUTOFF */
1156 /*
1157 * If LINK_EFFICIENCY is turned on, then the first overlimit
1158 * class we encounter will send a packet if all the classes
1159 * of the link-sharing structure are overlimit.
1160 */
1161 reset_cutoff(ifd);
1162 if (!ifd->efficient_ || first == NULL)
1163 return (NULL);
1164
1165 cl = first;
1166 cpri = cl->pri_;
1167 #if 0 /* too time-consuming for nothing */
1168 if (cl->sleeping_)
1169 CALLOUT_STOP(&cl->callout_);
1170 cl->sleeping_ = 0;
1171 cl->undertime_.tv_sec = 0;
1172 #endif
1173 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1174 ifd->cutoff_ = cl->borrow_->depth_;
1175
1176 /*
1177 * Deque the packet and do the book keeping...
1178 */
1179 _prr_out:
1180 if (op == ALTDQ_REMOVE) {
1181 m = _rmc_getq(cl);
1182 if (m == NULL)
1183 panic("_rmc_prr_dequeue_next");
1184 if (qempty(cl->q_))
1185 ifd->na_[cpri]--;
1186
1187 ifd->active_[cpri] = cl->peer_;
1188
1189 ifd->class_[ifd->qi_] = cl;
1190 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1191 ifd->now_[ifd->qi_] = now;
1192 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1193 ifd->queued_++;
1194 } else {
1195 /* mode == ALTDQ_POLL */
1196 m = _rmc_pollq(cl);
1197 ifd->pollcache_ = cl;
1198 }
1199 return (m);
1200 }
1201
1202 /*
1203 * mbuf_t *
1204 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1205 * is invoked by the packet driver to get the next packet to be
1206 * dequeued and output on the link. If WRR is enabled, then the
1207 * WRR dequeue next routine will determine the next packet to sent.
1208 * Otherwise, packet-by-packet round robin is invoked.
1209 *
1210 * Returns: NULL, if a packet is not available or if all
1211 * classes are overlimit.
1212 *
1213 * Otherwise, Pointer to the next packet.
1214 */
1215
1216 mbuf_t *
1217 rmc_dequeue_next(struct rm_ifdat *ifd, int mode)
1218 {
1219 if (ifd->queued_ >= ifd->maxqueued_)
1220 return (NULL);
1221 else if (ifd->wrr_)
1222 return (_rmc_wrr_dequeue_next(ifd, mode));
1223 else
1224 return (_rmc_prr_dequeue_next(ifd, mode));
1225 }
1226
1227 /*
1228 * Update the utilization estimate for the packet that just completed.
1229 * The packet's class & the parent(s) of that class all get their
1230 * estimators updated. This routine is called by the driver's output-
1231 * packet-completion interrupt service routine.
1232 */
1233
1234 /*
1235 * a macro to approximate "divide by 1000" that gives 0.000999,
1236 * if a value has enough effective digits.
1237 * (on pentium, mul takes 9 cycles but div takes 46!)
1238 */
1239 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1240 void
1241 rmc_update_class_util(struct rm_ifdat *ifd)
1242 {
1243 int idle, avgidle, pktlen;
1244 int pkt_time, tidle;
1245 rm_class_t *cl, *borrowed;
1246 rm_class_t *borrows;
1247 struct timeval *nowp;
1248
1249 /*
1250 * Get the most recent completed class.
1251 */
1252 if ((cl = ifd->class_[ifd->qo_]) == NULL)
1253 return;
1254
1255 pktlen = ifd->curlen_[ifd->qo_];
1256 borrowed = ifd->borrowed_[ifd->qo_];
1257 borrows = borrowed;
1258
1259 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1260
1261 /*
1262 * Run estimator on class and its ancestors.
1263 */
1264 /*
1265 * rm_update_class_util is designed to be called when the
1266 * transfer is completed from a xmit complete interrupt,
1267 * but most drivers don't implement an upcall for that.
1268 * so, just use estimated completion time.
1269 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1270 */
1271 nowp = &ifd->now_[ifd->qo_];
1272 /* get pkt_time (for link) in usec */
1273 #if 1 /* use approximation */
1274 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1275 pkt_time = NSEC_TO_USEC(pkt_time);
1276 #else
1277 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1278 #endif
1279 #if 1 /* ALTQ4PPP */
1280 if (TV_LT(nowp, &ifd->ifnow_)) {
1281 int iftime;
1282
1283 /*
1284 * make sure the estimated completion time does not go
1285 * too far. it can happen when the link layer supports
1286 * data compression or the interface speed is set to
1287 * a much lower value.
1288 */
1289 TV_DELTA(&ifd->ifnow_, nowp, iftime);
1290 if (iftime+pkt_time < ifd->maxiftime_) {
1291 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1292 } else {
1293 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1294 }
1295 } else {
1296 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1297 }
1298 #else
1299 if (TV_LT(nowp, &ifd->ifnow_)) {
1300 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1301 } else {
1302 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1303 }
1304 #endif
1305
1306 while (cl != NULL) {
1307 TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1308 if (idle >= 2000000)
1309 /*
1310 * this class is idle enough, reset avgidle.
1311 * (TV_DELTA returns 2000000 us when delta is large.)
1312 */
1313 cl->avgidle_ = cl->maxidle_;
1314
1315 /* get pkt_time (for class) in usec */
1316 #if 1 /* use approximation */
1317 pkt_time = pktlen * cl->ns_per_byte_;
1318 pkt_time = NSEC_TO_USEC(pkt_time);
1319 #else
1320 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1321 #endif
1322 idle -= pkt_time;
1323
1324 avgidle = cl->avgidle_;
1325 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1326 cl->avgidle_ = avgidle;
1327
1328 /* Are we overlimit ? */
1329 if (avgidle <= 0) {
1330 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1331 #if 1 /* ALTQ */
1332 /*
1333 * need some lower bound for avgidle, otherwise
1334 * a borrowing class gets unbounded penalty.
1335 */
1336 if (avgidle < cl->minidle_)
1337 avgidle = cl->avgidle_ = cl->minidle_;
1338 #endif
1339 /* set next idle to make avgidle 0 */
1340 tidle = pkt_time +
1341 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1342 TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1343 ++cl->stats_.over;
1344 } else {
1345 cl->avgidle_ =
1346 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1347 cl->undertime_.tv_sec = 0;
1348 if (cl->sleeping_) {
1349 CALLOUT_STOP(&cl->callout_);
1350 cl->sleeping_ = 0;
1351 }
1352 }
1353
1354 if (borrows != NULL) {
1355 if (borrows != cl)
1356 ++cl->stats_.borrows;
1357 else
1358 borrows = NULL;
1359 }
1360 cl->last_ = ifd->ifnow_;
1361 cl->last_pkttime_ = pkt_time;
1362
1363 #if 1
1364 if (cl->parent_ == NULL) {
1365 /* take stats of root class */
1366 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1367 }
1368 #endif
1369
1370 cl = cl->parent_;
1371 }
1372
1373 /*
1374 * Check to see if cutoff needs to set to a new level.
1375 */
1376 cl = ifd->class_[ifd->qo_];
1377 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1378 #if 1 /* ALTQ */
1379 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1380 rmc_tl_satisfied(ifd, nowp);
1381 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1382 } else {
1383 ifd->cutoff_ = borrowed->depth_;
1384 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1385 }
1386 #else /* !ALTQ */
1387 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1388 reset_cutoff(ifd);
1389 #ifdef notdef
1390 rmc_tl_satisfied(ifd, &now);
1391 #endif
1392 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1393 } else {
1394 ifd->cutoff_ = borrowed->depth_;
1395 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1396 }
1397 #endif /* !ALTQ */
1398 }
1399
1400 /*
1401 * Release class slot
1402 */
1403 ifd->borrowed_[ifd->qo_] = NULL;
1404 ifd->class_[ifd->qo_] = NULL;
1405 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1406 ifd->queued_--;
1407 }
1408
1409 /*
1410 * void
1411 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1412 * over-limit action routines. These get invoked by rmc_under_limit()
1413 * if a class with packets to send if over its bandwidth limit & can't
1414 * borrow from a parent class.
1415 *
1416 * Returns: NONE
1417 */
1418
1419 static void
1420 rmc_drop_action(struct rm_class *cl)
1421 {
1422 struct rm_ifdat *ifd = cl->ifdat_;
1423
1424 ASSERT(qlen(cl->q_) > 0);
1425 _rmc_dropq(cl);
1426 if (qempty(cl->q_))
1427 ifd->na_[cl->pri_]--;
1428 }
1429
1430 void
1431 rmc_dropall(struct rm_class *cl)
1432 {
1433 struct rm_ifdat *ifd = cl->ifdat_;
1434
1435 if (!qempty(cl->q_)) {
1436 _flushq(cl->q_);
1437
1438 ifd->na_[cl->pri_]--;
1439 }
1440 }
1441
1442 #if (__FreeBSD_version > 300000)
1443 /* hzto() is removed from FreeBSD-3.0 */
1444 static int hzto(struct timeval *);
1445
1446 static int
1447 hzto(struct timeval *tv)
1448 {
1449 struct timeval t2;
1450
1451 getmicrotime(&t2);
1452 t2.tv_sec = tv->tv_sec - t2.tv_sec;
1453 t2.tv_usec = tv->tv_usec - t2.tv_usec;
1454 return (tvtohz(&t2));
1455 }
1456 #endif /* __FreeBSD_version > 300000 */
1457
1458 /*
1459 * void
1460 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1461 * delay action routine. It is invoked via rmc_under_limit when the
1462 * packet is discoverd to be overlimit.
1463 *
1464 * If the delay action is result of borrow class being overlimit, then
1465 * delay for the offtime of the borrowing class that is overlimit.
1466 *
1467 * Returns: NONE
1468 */
1469
1470 void
1471 rmc_delay_action(struct rm_class *cl, struct rm_class *borrow)
1472 {
1473 int ndelay, t, extradelay;
1474
1475 cl->stats_.overactions++;
1476 TV_DELTA(&cl->undertime_, &cl->overtime_, ndelay);
1477 #ifndef BORROW_OFFTIME
1478 ndelay += cl->offtime_;
1479 #endif
1480
1481 if (!cl->sleeping_) {
1482 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1483 #ifdef BORROW_OFFTIME
1484 if (borrow != NULL)
1485 extradelay = borrow->offtime_;
1486 else
1487 #endif
1488 extradelay = cl->offtime_;
1489
1490 #ifdef ALTQ
1491 /*
1492 * XXX recalculate suspend time:
1493 * current undertime is (tidle + pkt_time) calculated
1494 * from the last transmission.
1495 * tidle: time required to bring avgidle back to 0
1496 * pkt_time: target waiting time for this class
1497 * we need to replace pkt_time by offtime
1498 */
1499 extradelay -= cl->last_pkttime_;
1500 #endif
1501 if (extradelay > 0) {
1502 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1503 ndelay += extradelay;
1504 }
1505
1506 cl->sleeping_ = 1;
1507 cl->stats_.delays++;
1508
1509 /*
1510 * Since packets are phased randomly with respect to the
1511 * clock, 1 tick (the next clock tick) can be an arbitrarily
1512 * short time so we have to wait for at least two ticks.
1513 * NOTE: If there's no other traffic, we need the timer as
1514 * a 'backstop' to restart this class.
1515 */
1516 if (ndelay > tick * 2) {
1517 #ifdef __FreeBSD__
1518 /* FreeBSD rounds up the tick */
1519 t = hzto(&cl->undertime_);
1520 #else
1521 /* other BSDs round down the tick */
1522 t = hzto(&cl->undertime_) + 1;
1523 #endif
1524 } else
1525 t = 2;
1526 CALLOUT_RESET(&cl->callout_, t,
1527 (timeout_t *)rmc_restart, (caddr_t)cl);
1528 }
1529 }
1530
1531 /*
1532 * void
1533 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1534 * called by the system timer code & is responsible checking if the
1535 * class is still sleeping (it might have been restarted as a side
1536 * effect of the queue scan on a packet arrival) and, if so, restarting
1537 * output for the class. Inspecting the class state & restarting output
1538 * require locking the class structure. In general the driver is
1539 * responsible for locking but this is the only routine that is not
1540 * called directly or indirectly from the interface driver so it has
1541 * know about system locking conventions. Under bsd, locking is done
1542 * by raising IPL to splnet so that's what's implemented here. On a
1543 * different system this would probably need to be changed.
1544 *
1545 * Returns: NONE
1546 */
1547
1548 static void
1549 rmc_restart(struct rm_class *cl)
1550 {
1551 struct rm_ifdat *ifd = cl->ifdat_;
1552 int s;
1553
1554 s = splnet();
1555 if (cl->sleeping_) {
1556 cl->sleeping_ = 0;
1557 cl->undertime_.tv_sec = 0;
1558
1559 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1560 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1561 (ifd->restart)(ifd->ifq_);
1562 }
1563 }
1564 splx(s);
1565 }
1566
1567 /*
1568 * void
1569 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1570 * handling routine for the root class of the link sharing structure.
1571 *
1572 * Returns: NONE
1573 */
1574
1575 static void
1576 rmc_root_overlimit(struct rm_class *cl __unused,
1577 struct rm_class *borrow __unused)
1578 {
1579 panic("rmc_root_overlimit");
1580 }
1581
1582 /*
1583 * Packet Queue handling routines. Eventually, this is to localize the
1584 * effects on the code whether queues are red queues or droptail
1585 * queues.
1586 */
1587
1588 static int
1589 _rmc_addq(rm_class_t *cl, mbuf_t *m)
1590 {
1591 #ifdef ALTQ_RIO
1592 if (q_is_rio(cl->q_))
1593 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1594 #endif
1595 #ifdef ALTQ_RED
1596 if (q_is_red(cl->q_))
1597 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1598 #endif /* ALTQ_RED */
1599
1600 if (cl->flags_ & RMCF_CLEARDSCP)
1601 write_dsfield(m, cl->pktattr_, 0);
1602
1603 _addq(cl->q_, m);
1604 return (0);
1605 }
1606
1607 /* note: _rmc_dropq is not called for red */
1608 static void
1609 _rmc_dropq(rm_class_t *cl)
1610 {
1611 mbuf_t *m;
1612
1613 if ((m = _getq(cl->q_)) != NULL)
1614 m_freem(m);
1615 }
1616
1617 static mbuf_t *
1618 _rmc_getq(rm_class_t *cl)
1619 {
1620 #ifdef ALTQ_RIO
1621 if (q_is_rio(cl->q_))
1622 return rio_getq((rio_t *)cl->red_, cl->q_);
1623 #endif
1624 #ifdef ALTQ_RED
1625 if (q_is_red(cl->q_))
1626 return red_getq(cl->red_, cl->q_);
1627 #endif
1628 return _getq(cl->q_);
1629 }
1630
1631 static mbuf_t *
1632 _rmc_pollq(rm_class_t *cl)
1633 {
1634 return qhead(cl->q_);
1635 }
1636
1637 #ifdef CBQ_TRACE
1638
1639 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1640 struct cbqtrace *cbqtrace_ptr = NULL;
1641 int cbqtrace_count;
1642
1643 /*
1644 * DDB hook to trace cbq events:
1645 * the last 1024 events are held in a circular buffer.
1646 * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1647 */
1648 void cbqtrace_dump(int);
1649 static char *rmc_funcname(void *);
1650
1651 static struct rmc_funcs {
1652 void *func;
1653 char *name;
1654 } rmc_funcs[] =
1655 {
1656 rmc_init, "rmc_init",
1657 rmc_queue_packet, "rmc_queue_packet",
1658 rmc_under_limit, "rmc_under_limit",
1659 rmc_update_class_util, "rmc_update_class_util",
1660 rmc_delay_action, "rmc_delay_action",
1661 rmc_restart, "rmc_restart",
1662 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1663 NULL, NULL
1664 };
1665
1666 static char *
1667 rmc_funcname(void *func)
1668 {
1669 struct rmc_funcs *fp;
1670
1671 for (fp = rmc_funcs; fp->func != NULL; fp++)
1672 if (fp->func == func)
1673 return (fp->name);
1674 return ("unknown");
1675 }
1676
1677 void
1678 cbqtrace_dump(int counter)
1679 {
1680 int i, *p;
1681 char *cp;
1682
1683 counter = counter % NCBQTRACE;
1684 p = (int *)&cbqtrace_buffer[counter];
1685
1686 for (i=0; i<20; i++) {
1687 printf("[0x%x] ", *p++);
1688 printf("%s: ", rmc_funcname((void *)*p++));
1689 cp = (char *)p++;
1690 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1691 printf("%d\n",*p++);
1692
1693 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1694 p = (int *)cbqtrace_buffer;
1695 }
1696 }
1697 #endif /* CBQ_TRACE */
1698 #endif /* ALTQ_CBQ */
1699
1700 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
1701 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1702
1703 void
1704 _addq(class_queue_t *q, mbuf_t *m)
1705 {
1706 mbuf_t *m0;
1707
1708 if ((m0 = qtail(q)) != NULL)
1709 m->m_nextpkt = m0->m_nextpkt;
1710 else
1711 m0 = m;
1712 m0->m_nextpkt = m;
1713 qtail(q) = m;
1714 qlen(q)++;
1715 }
1716
1717 mbuf_t *
1718 _getq(class_queue_t *q)
1719 {
1720 mbuf_t *m, *m0;
1721
1722 if ((m = qtail(q)) == NULL)
1723 return (NULL);
1724 if ((m0 = m->m_nextpkt) != m)
1725 m->m_nextpkt = m0->m_nextpkt;
1726 else {
1727 ASSERT(qlen(q) == 1);
1728 qtail(q) = NULL;
1729 }
1730 qlen(q)--;
1731 m0->m_nextpkt = NULL;
1732 return (m0);
1733 }
1734
1735 /* drop a packet at the tail of the queue */
1736 mbuf_t *
1737 _getq_tail(class_queue_t *q)
1738 {
1739 mbuf_t *m, *m0, *prev;
1740
1741 if ((m = m0 = qtail(q)) == NULL)
1742 return NULL;
1743 do {
1744 prev = m0;
1745 m0 = m0->m_nextpkt;
1746 } while (m0 != m);
1747 prev->m_nextpkt = m->m_nextpkt;
1748 if (prev == m) {
1749 ASSERT(qlen(q) == 1);
1750 qtail(q) = NULL;
1751 } else
1752 qtail(q) = prev;
1753 qlen(q)--;
1754 m->m_nextpkt = NULL;
1755 return (m);
1756 }
1757
1758 /* randomly select a packet in the queue */
1759 mbuf_t *
1760 _getq_random(class_queue_t *q)
1761 {
1762 struct mbuf *m;
1763 int i, n;
1764
1765 if ((m = qtail(q)) == NULL)
1766 return NULL;
1767 if (m->m_nextpkt == m) {
1768 ASSERT(qlen(q) == 1);
1769 qtail(q) = NULL;
1770 } else {
1771 struct mbuf *prev = NULL;
1772
1773 n = arc4random() % qlen(q) + 1;
1774 for (i = 0; i < n; i++) {
1775 prev = m;
1776 m = m->m_nextpkt;
1777 }
1778 prev->m_nextpkt = m->m_nextpkt;
1779 if (m == qtail(q))
1780 qtail(q) = prev;
1781 }
1782 qlen(q)--;
1783 m->m_nextpkt = NULL;
1784 return (m);
1785 }
1786
1787 void
1788 _removeq(class_queue_t *q, mbuf_t *m)
1789 {
1790 mbuf_t *m0, *prev;
1791
1792 m0 = qtail(q);
1793 do {
1794 prev = m0;
1795 m0 = m0->m_nextpkt;
1796 } while (m0 != m);
1797 prev->m_nextpkt = m->m_nextpkt;
1798 if (prev == m)
1799 qtail(q) = NULL;
1800 else if (qtail(q) == m)
1801 qtail(q) = prev;
1802 qlen(q)--;
1803 }
1804
1805 void
1806 _flushq(class_queue_t *q)
1807 {
1808 mbuf_t *m;
1809
1810 while ((m = _getq(q)) != NULL)
1811 m_freem(m);
1812 ASSERT(qlen(q) == 0);
1813 }
1814
1815 #endif /* !__GNUC__ || ALTQ_DEBUG */
1816 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
1817