altq_rmclass.c revision 1.4 1 /* $NetBSD: altq_rmclass.c,v 1.4 2001/04/13 23:29:56 thorpej Exp $ */
2 /* $KAME: altq_rmclass.c,v 1.9 2000/12/14 08:12:46 thorpej Exp $ */
3
4 /*
5 * Copyright (c) 1991-1997 Regents of the University of California.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the Network Research
19 * Group at Lawrence Berkeley Laboratory.
20 * 4. Neither the name of the University nor of the Laboratory may be used
21 * to endorse or promote products derived from this software without
22 * specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * LBL code modified by speer (at) eng.sun.com, May 1977.
37 * For questions and/or comments, please send mail to cbq (at) ee.lbl.gov
38 */
39
40 #ident "@(#)rm_class.c 1.48 97/12/05 SMI"
41
42 #if defined(__FreeBSD__) || defined(__NetBSD__)
43 #include "opt_altq.h"
44 #if (__FreeBSD__ != 2)
45 #include "opt_inet.h"
46 #ifdef __FreeBSD__
47 #include "opt_inet6.h"
48 #endif
49 #endif
50 #endif /* __FreeBSD__ || __NetBSD__ */
51 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
52
53 #include <sys/param.h>
54 #include <sys/malloc.h>
55 #include <sys/mbuf.h>
56 #include <sys/socket.h>
57 #include <sys/systm.h>
58 #include <sys/errno.h>
59 #include <sys/time.h>
60 #include <sys/kernel.h>
61
62 #include <net/if.h>
63 #include <netinet/in.h>
64 #include <netinet/in_systm.h>
65 #include <netinet/ip.h>
66
67 #include <altq/altq.h>
68 #include <altq/altq_rmclass.h>
69 #include <altq/altq_rmclass_debug.h>
70 #include <altq/altq_red.h>
71 #include <altq/altq_rio.h>
72
73 /*
74 * Local Macros
75 */
76
77 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
78
79 /*
80 * Local routines.
81 */
82
83 static int rmc_satisfied __P((struct rm_class *, struct timeval *));
84 static void rmc_wrr_set_weights __P((struct rm_ifdat *));
85 static void rmc_depth_compute __P((struct rm_class *));
86 static void rmc_depth_recompute __P((rm_class_t *));
87
88 static mbuf_t *_rmc_wrr_dequeue_next __P((struct rm_ifdat *, int));
89 static mbuf_t *_rmc_prr_dequeue_next __P((struct rm_ifdat *, int));
90
91 static int _rmc_addq __P((rm_class_t *, mbuf_t *));
92 static void _rmc_dropq __P((rm_class_t *));
93 static mbuf_t *_rmc_getq __P((rm_class_t *));
94 static mbuf_t *_rmc_pollq __P((rm_class_t *));
95
96 static int rmc_under_limit __P((struct rm_class *, struct timeval *));
97 static void rmc_tl_satisfied __P((struct rm_ifdat *, struct timeval *));
98 static void rmc_drop_action __P((struct rm_class *));
99 static void rmc_restart __P((struct rm_class *));
100 static void rmc_root_overlimit __P((struct rm_class *, struct rm_class *));
101
102 #define BORROW_OFFTIME
103 /*
104 * BORROW_OFFTIME (experimental):
105 * borrow the offtime of the class borrowing from.
106 * the reason is that when its own offtime is set, the class is unable
107 * to borrow much, especially when cutoff is taking effect.
108 * but when the borrowed class is overloaded (advidle is close to minidle),
109 * use the borrowing class's offtime to avoid overload.
110 */
111 #define ADJUST_CUTOFF
112 /*
113 * ADJUST_CUTOFF (experimental):
114 * if no underlimit class is found due to cutoff, increase cutoff and
115 * retry the scheduling loop.
116 * also, don't invoke delay_actions while cutoff is taking effect,
117 * since a sleeping class won't have a chance to be scheduled in the
118 * next loop.
119 *
120 * now heuristics for setting the top-level variable (cutoff_) becomes:
121 * 1. if a packet arrives for a not-overlimit class, set cutoff
122 * to the depth of the class.
123 * 2. if cutoff is i, and a packet arrives for an overlimit class
124 * with an underlimit ancestor at a lower level than i (say j),
125 * then set cutoff to j.
126 * 3. at scheduling a packet, if there is no underlimit class
127 * due to the current cutoff level, increase cutoff by 1 and
128 * then try to schedule again.
129 */
130
131 /*
132 * rm_class_t *
133 * rmc_newclass(...) - Create a new resource management class at priority
134 * 'pri' on the interface given by 'ifd'.
135 *
136 * nsecPerByte is the data rate of the interface in nanoseconds/byte.
137 * E.g., 800 for a 10Mb/s ethernet. If the class gets less
138 * than 100% of the bandwidth, this number should be the
139 * 'effective' rate for the class. Let f be the
140 * bandwidth fraction allocated to this class, and let
141 * nsPerByte be the data rate of the output link in
142 * nanoseconds/byte. Then nsecPerByte is set to
143 * nsPerByte / f. E.g., 1600 (= 800 / .5)
144 * for a class that gets 50% of an ethernet's bandwidth.
145 *
146 * action the routine to call when the class is over limit.
147 *
148 * maxq max allowable queue size for class (in packets).
149 *
150 * parent parent class pointer.
151 *
152 * borrow class to borrow from (should be either 'parent' or null).
153 *
154 * maxidle max value allowed for class 'idle' time estimate (this
155 * parameter determines how large an initial burst of packets
156 * can be before overlimit action is invoked.
157 *
158 * offtime how long 'delay' action will delay when class goes over
159 * limit (this parameter determines the steady-state burst
160 * size when a class is running over its limit).
161 *
162 * Maxidle and offtime have to be computed from the following: If the
163 * average packet size is s, the bandwidth fraction allocated to this
164 * class is f, we want to allow b packet bursts, and the gain of the
165 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
166 *
167 * ptime = s * nsPerByte * (1 - f) / f
168 * maxidle = ptime * (1 - g^b) / g^b
169 * minidle = -ptime * (1 / (f - 1))
170 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
171 *
172 * Operationally, it's convenient to specify maxidle & offtime in units
173 * independent of the link bandwidth so the maxidle & offtime passed to
174 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
175 * (The constant factor is a scale factor needed to make the parameters
176 * integers. This scaling also means that the 'unscaled' values of
177 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
178 * not nanoseconds.) Also note that the 'idle' filter computation keeps
179 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
180 * maxidle also must be scaled upward by this value. Thus, the passed
181 * values for maxidle and offtime can be computed as follows:
182 *
183 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
184 * offtime = offtime * 8 / (1000 * nsecPerByte)
185 *
186 * When USE_HRTIME is employed, then maxidle and offtime become:
187 * maxidle = maxilde * (8.0 / nsecPerByte);
188 * offtime = offtime * (8.0 / nsecPerByte);
189 */
190
191 struct rm_class *
192 rmc_newclass(pri, ifd, nsecPerByte, action, maxq, parent, borrow,
193 maxidle, minidle, offtime, pktsize, flags)
194 int pri;
195 struct rm_ifdat *ifd;
196 u_int nsecPerByte;
197 void (*action)(rm_class_t *, rm_class_t *);
198 int maxq;
199 struct rm_class *parent;
200 struct rm_class *borrow;
201 u_int maxidle;
202 int minidle;
203 u_int offtime;
204 int pktsize;
205 int flags;
206 {
207 struct rm_class *cl;
208 struct rm_class *peer;
209 int s;
210
211 if (pri >= RM_MAXPRIO)
212 return (NULL);
213 #ifndef ALTQ_RED
214 if (flags & RMCF_RED) {
215 printf("rmc_newclass: RED not configured for CBQ!\n");
216 return (NULL);
217 }
218 #endif
219 #ifndef ALTQ_RIO
220 if (flags & RMCF_RIO) {
221 printf("rmc_newclass: RIO not configured for CBQ!\n");
222 return (NULL);
223 }
224 #endif
225
226 MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
227 M_DEVBUF, M_WAITOK);
228 if (cl == NULL)
229 return (NULL);
230 bzero(cl, sizeof(struct rm_class));
231 CALLOUT_INIT(&cl->callout_);
232 MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
233 M_DEVBUF, M_WAITOK);
234 if (cl->q_ == NULL) {
235 FREE(cl, M_DEVBUF);
236 return (NULL);
237 }
238 bzero(cl->q_, sizeof(class_queue_t));
239
240 /*
241 * Class initialization.
242 */
243 cl->children_ = NULL;
244 cl->parent_ = parent;
245 cl->borrow_ = borrow;
246 cl->leaf_ = 1;
247 cl->ifdat_ = ifd;
248 cl->pri_ = pri;
249 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
250 cl->depth_ = 0;
251 cl->qthresh_ = 0;
252 cl->ns_per_byte_ = nsecPerByte;
253
254 qlimit(cl->q_) = maxq;
255 qtype(cl->q_) = Q_DROPHEAD;
256 qlen(cl->q_) = 0;
257 cl->flags_ = flags;
258
259 #if 1 /* minidle is also scaled in ALTQ */
260 cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
261 if (cl->minidle_ > 0)
262 cl->minidle_ = 0;
263 #else
264 cl->minidle_ = minidle;
265 #endif
266 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
267 if (cl->maxidle_ == 0)
268 cl->maxidle_ = 1;
269 #if 1 /* offtime is also scaled in ALTQ */
270 cl->avgidle_ = cl->maxidle_;
271 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
272 if (cl->offtime_ == 0)
273 cl->offtime_ = 1;
274 #else
275 cl->avgidle_ = 0;
276 cl->offtime_ = (offtime * nsecPerByte) / 8;
277 #endif
278 cl->overlimit = action;
279
280 #ifdef ALTQ_RED
281 if (flags & (RMCF_RED|RMCF_RIO)) {
282 int red_flags, red_pkttime;
283
284 red_flags = 0;
285 if (flags & RMCF_ECN)
286 red_flags |= REDF_ECN;
287 if (flags & RMCF_FLOWVALVE)
288 red_flags |= REDF_FLOWVALVE;
289 #ifdef ALTQ_RIO
290 if (flags & RMCF_CLEARDSCP)
291 red_flags |= RIOF_CLEARDSCP;
292 #endif
293 red_pkttime = nsecPerByte * pktsize / 1000;
294
295 if (flags & RMCF_RED) {
296 cl->red_ = red_alloc(0, 0, 0, 0,
297 red_flags, red_pkttime);
298 if (cl->red_ != NULL)
299 qtype(cl->q_) = Q_RED;
300 }
301 #ifdef ALTQ_RIO
302 else {
303 cl->red_ = (red_t *)rio_alloc(0, NULL,
304 red_flags, red_pkttime);
305 if (cl->red_ != NULL)
306 qtype(cl->q_) = Q_RIO;
307 }
308 #endif
309 }
310 #endif /* ALTQ_RED */
311
312 /*
313 * put the class into the class tree
314 */
315 s = splnet();
316 if ((peer = ifd->active_[pri]) != NULL) {
317 /* find the last class at this pri */
318 cl->peer_ = peer;
319 while (peer->peer_ != ifd->active_[pri])
320 peer = peer->peer_;
321 peer->peer_ = cl;
322 } else {
323 ifd->active_[pri] = cl;
324 cl->peer_ = cl;
325 }
326
327 if (cl->parent_) {
328 cl->next_ = parent->children_;
329 parent->children_ = cl;
330 parent->leaf_ = 0;
331 }
332
333 /*
334 * Compute the depth of this class and it's ancestors in the class
335 * hierarchy.
336 */
337 rmc_depth_compute(cl);
338
339 /*
340 * If CBQ's WRR is enabled, then initailize the class WRR state.
341 */
342 if (ifd->wrr_) {
343 ifd->num_[pri]++;
344 ifd->alloc_[pri] += cl->allotment_;
345 rmc_wrr_set_weights(ifd);
346 }
347 splx(s);
348 return (cl);
349 }
350
351 int
352 rmc_modclass(cl, nsecPerByte, maxq, maxidle, minidle, offtime, pktsize)
353 struct rm_class *cl;
354 u_int nsecPerByte;
355 int maxq;
356 u_int maxidle;
357 int minidle;
358 u_int offtime;
359 int pktsize;
360 {
361 struct rm_ifdat *ifd;
362 u_int old_allotment;
363 int s;
364
365 ifd = cl->ifdat_;
366 old_allotment = cl->allotment_;
367
368 s = splnet();
369 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
370 cl->qthresh_ = 0;
371 cl->ns_per_byte_ = nsecPerByte;
372
373 qlimit(cl->q_) = maxq;
374
375 #if 1 /* minidle is also scaled in ALTQ */
376 cl->minidle_ = (minidle * nsecPerByte) / 8;
377 if (cl->minidle_ > 0)
378 cl->minidle_ = 0;
379 #else
380 cl->minidle_ = minidle;
381 #endif
382 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
383 if (cl->maxidle_ == 0)
384 cl->maxidle_ = 1;
385 #if 1 /* offtime is also scaled in ALTQ */
386 cl->avgidle_ = cl->maxidle_;
387 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
388 if (cl->offtime_ == 0)
389 cl->offtime_ = 1;
390 #else
391 cl->avgidle_ = 0;
392 cl->offtime_ = (offtime * nsecPerByte) / 8;
393 #endif
394
395 /*
396 * If CBQ's WRR is enabled, then initailize the class WRR state.
397 */
398 if (ifd->wrr_) {
399 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
400 rmc_wrr_set_weights(ifd);
401 }
402 splx(s);
403 return (0);
404 }
405
406 /*
407 * static void
408 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
409 * the appropriate run robin weights for the CBQ weighted round robin
410 * algorithm.
411 *
412 * Returns: NONE
413 */
414
415 static void
416 rmc_wrr_set_weights(ifd)
417 struct rm_ifdat *ifd;
418 {
419 int i;
420 struct rm_class *cl, *clh;
421
422 for (i = 0; i < RM_MAXPRIO; i++) {
423 /*
424 * This is inverted from that of the simulator to
425 * maintain precision.
426 */
427 if (ifd->num_[i] == 0)
428 ifd->M_[i] = 0;
429 else
430 ifd->M_[i] = ifd->alloc_[i] /
431 (ifd->num_[i] * ifd->maxpkt_);
432 /*
433 * Compute the weigthed allotment for each class.
434 * This takes the expensive div instruction out
435 * of the main loop for the wrr scheduling path.
436 * These only get recomputed when a class comes or
437 * goes.
438 */
439 if (ifd->active_[i] != NULL) {
440 clh = cl = ifd->active_[i];
441 do {
442 /* safe-guard for slow link or alloc_ == 0 */
443 if (ifd->M_[i] == 0)
444 cl->w_allotment_ = 0;
445 else
446 cl->w_allotment_ = cl->allotment_ /
447 ifd->M_[i];
448 cl = cl->peer_;
449 } while ((cl != NULL) && (cl != clh));
450 }
451 }
452 }
453
454 int
455 rmc_get_weight(ifd, pri)
456 struct rm_ifdat *ifd;
457 int pri;
458 {
459 if ((pri >= 0) && (pri < RM_MAXPRIO))
460 return (ifd->M_[pri]);
461 else
462 return (0);
463 }
464
465 /*
466 * static void
467 * rmc_depth_compute(struct rm_class *cl) - This function computes the
468 * appropriate depth of class 'cl' and its ancestors.
469 *
470 * Returns: NONE
471 */
472
473 static void
474 rmc_depth_compute(cl)
475 struct rm_class *cl;
476 {
477 rm_class_t *t = cl, *p;
478
479 /*
480 * Recompute the depth for the branch of the tree.
481 */
482 while (t != NULL) {
483 p = t->parent_;
484 if (p && (t->depth_ >= p->depth_)) {
485 p->depth_ = t->depth_ + 1;
486 t = p;
487 } else
488 t = NULL;
489 }
490 }
491
492 /*
493 * static void
494 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
495 * the depth of the tree after a class has been deleted.
496 *
497 * Returns: NONE
498 */
499
500 static void
501 rmc_depth_recompute(rm_class_t *cl)
502 {
503 #if 1 /* ALTQ */
504 rm_class_t *p, *t;
505
506 p = cl;
507 while (p != NULL) {
508 if ((t = p->children_) == NULL) {
509 p->depth_ = 0;
510 } else {
511 int cdepth = 0;
512
513 while (t != NULL) {
514 if (t->depth_ > cdepth)
515 cdepth = t->depth_;
516 t = t->next_;
517 }
518
519 if (p->depth_ == cdepth + 1)
520 /* no change to this parent */
521 return;
522
523 p->depth_ = cdepth + 1;
524 }
525
526 p = p->parent_;
527 }
528 #else
529 rm_class_t *t;
530
531 if (cl->depth_ >= 1) {
532 if (cl->children_ == NULL) {
533 cl->depth_ = 0;
534 } else if ((t = cl->children_) != NULL) {
535 while (t != NULL) {
536 if (t->children_ != NULL)
537 rmc_depth_recompute(t);
538 t = t->next_;
539 }
540 } else
541 rmc_depth_compute(cl);
542 }
543 #endif
544 }
545
546 /*
547 * void
548 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
549 * function deletes a class from the link-sharing stucture and frees
550 * all resources associated with the class.
551 *
552 * Returns: NONE
553 */
554
555 void
556 rmc_delete_class(ifd, cl)
557 struct rm_ifdat *ifd;
558 struct rm_class *cl;
559 {
560 struct rm_class *p, *head, *previous;
561 int s;
562
563 ASSERT(cl->children_ == NULL);
564
565 if (cl->sleeping_)
566 CALLOUT_STOP(&cl->callout_);
567
568 s = splnet();
569 /*
570 * Free packets in the packet queue.
571 * XXX - this may not be a desired behavior. Packets should be
572 * re-queued.
573 */
574 rmc_dropall(cl);
575
576 /*
577 * If the class has a parent, then remove the class from the
578 * class from the parent's children chain.
579 */
580 if (cl->parent_ != NULL) {
581 head = cl->parent_->children_;
582 p = previous = head;
583 if (head->next_ == NULL) {
584 ASSERT(head == cl);
585 cl->parent_->children_ = NULL;
586 cl->parent_->leaf_ = 1;
587 } else while (p != NULL) {
588 if (p == cl) {
589 if (cl == head)
590 cl->parent_->children_ = cl->next_;
591 else
592 previous->next_ = cl->next_;
593 cl->next_ = NULL;
594 p = NULL;
595 } else {
596 previous = p;
597 p = p->next_;
598 }
599 }
600 }
601
602 /*
603 * Delete class from class priority peer list.
604 */
605 if ((p = ifd->active_[cl->pri_]) != NULL) {
606 /*
607 * If there is more than one member of this priority
608 * level, then look for class(cl) in the priority level.
609 */
610 if (p != p->peer_) {
611 while (p->peer_ != cl)
612 p = p->peer_;
613 p->peer_ = cl->peer_;
614
615 if (ifd->active_[cl->pri_] == cl)
616 ifd->active_[cl->pri_] = cl->peer_;
617 } else {
618 ASSERT(p == cl);
619 ifd->active_[cl->pri_] = NULL;
620 }
621 }
622
623 /*
624 * Recompute the WRR weights.
625 */
626 if (ifd->wrr_) {
627 ifd->alloc_[cl->pri_] -= cl->allotment_;
628 ifd->num_[cl->pri_]--;
629 rmc_wrr_set_weights(ifd);
630 }
631
632 /*
633 * Re-compute the depth of the tree.
634 */
635 #if 1 /* ALTQ */
636 rmc_depth_recompute(cl->parent_);
637 #else
638 rmc_depth_recompute(ifd->root_);
639 #endif
640
641 splx(s);
642
643 /*
644 * Free the class structure.
645 */
646 if (cl->red_ != NULL) {
647 #ifdef ALTQ_RIO
648 if (q_is_rio(cl->q_))
649 rio_destroy((rio_t *)cl->red_);
650 #endif
651 #ifdef ALTQ_RED
652 if (q_is_red(cl->q_))
653 red_destroy(cl->red_);
654 #endif
655 }
656 FREE(cl->q_, M_DEVBUF);
657 FREE(cl, M_DEVBUF);
658 }
659
660
661 /*
662 * void
663 * rmc_init(...) - Initialize the resource management data structures
664 * associated with the output portion of interface 'ifp'. 'ifd' is
665 * where the structures will be built (for backwards compatibility, the
666 * structures aren't kept in the ifnet struct). 'nsecPerByte'
667 * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
668 * 'restart' is the driver-specific routine that the generic 'delay
669 * until under limit' action will call to restart output. `maxq'
670 * is the queue size of the 'link' & 'default' classes. 'maxqueued'
671 * is the maximum number of packets that the resource management
672 * code will allow to be queued 'downstream' (this is typically 1).
673 *
674 * Returns: NONE
675 */
676
677 void
678 rmc_init(ifq, ifd, nsecPerByte, restart, maxq, maxqueued, maxidle,
679 minidle, offtime, flags)
680 struct ifaltq *ifq;
681 struct rm_ifdat *ifd;
682 u_int nsecPerByte;
683 void (*restart)(struct ifaltq *);
684 int maxq, maxqueued;
685 u_int maxidle;
686 int minidle;
687 u_int offtime;
688 int flags;
689 {
690 int i, mtu;
691
692 /*
693 * Initialize the CBQ traciing/debug facility.
694 */
695 CBQTRACEINIT();
696
697 bzero((char *)ifd, sizeof (*ifd));
698 mtu = ifq->altq_ifp->if_mtu;
699 ifd->ifq_ = ifq;
700 ifd->restart = restart;
701 ifd->maxqueued_ = maxqueued;
702 ifd->ns_per_byte_ = nsecPerByte;
703 ifd->maxpkt_ = mtu;
704 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
705 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
706 #if 1
707 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
708 if (mtu * nsecPerByte > 10 * 1000000)
709 ifd->maxiftime_ /= 4;
710 #endif
711
712 reset_cutoff(ifd);
713 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
714
715 /*
716 * Initialize the CBQ's WRR state.
717 */
718 for (i = 0; i < RM_MAXPRIO; i++) {
719 ifd->alloc_[i] = 0;
720 ifd->M_[i] = 0;
721 ifd->num_[i] = 0;
722 ifd->na_[i] = 0;
723 ifd->active_[i] = NULL;
724 }
725
726 /*
727 * Initialize current packet state.
728 */
729 ifd->qi_ = 0;
730 ifd->qo_ = 0;
731 for (i = 0; i < RM_MAXQUEUED; i++) {
732 ifd->class_[i] = NULL;
733 ifd->curlen_[i] = 0;
734 ifd->borrowed_[i] = NULL;
735 }
736
737 /*
738 * Create the root class of the link-sharing structure.
739 */
740 if ((ifd->root_ = rmc_newclass(0, ifd,
741 nsecPerByte,
742 rmc_root_overlimit, maxq, 0, 0,
743 maxidle, minidle, offtime,
744 0, 0)) == NULL) {
745 printf("rmc_init: root class not allocated\n");
746 return ;
747 }
748 ifd->root_->depth_ = 0;
749 }
750
751 /*
752 * void
753 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
754 * mbuf 'm' to queue for resource class 'cl'. This routine is called
755 * by a driver's if_output routine. This routine must be called with
756 * output packet completion interrupts locked out (to avoid racing with
757 * rmc_dequeue_next).
758 *
759 * Returns: 0 on successful queueing
760 * -1 when packet drop occurs
761 */
762 int
763 rmc_queue_packet(cl, m)
764 struct rm_class *cl;
765 mbuf_t *m;
766 {
767 struct timeval now;
768 struct rm_ifdat *ifd = cl->ifdat_;
769 int cpri = cl->pri_;
770 int is_empty = qempty(cl->q_);
771
772 RM_GETTIME(now);
773 if (ifd->cutoff_ > 0) {
774 if (TV_LT(&cl->undertime_, &now)) {
775 if (ifd->cutoff_ > cl->depth_)
776 ifd->cutoff_ = cl->depth_;
777 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
778 }
779 #if 1 /* ALTQ */
780 else {
781 /*
782 * the class is overlimit. if the class has
783 * underlimit ancestors, set cutoff to the lowest
784 * depth among them.
785 */
786 struct rm_class *borrow = cl->borrow_;
787
788 while (borrow != NULL &&
789 borrow->depth_ < ifd->cutoff_) {
790 if (TV_LT(&borrow->undertime_, &now)) {
791 ifd->cutoff_ = borrow->depth_;
792 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
793 break;
794 }
795 borrow = borrow->borrow_;
796 }
797 }
798 #else /* !ALTQ */
799 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
800 if (TV_LT(&cl->borrow_->undertime_, &now)) {
801 ifd->cutoff_ = cl->borrow_->depth_;
802 CBQTRACE(rmc_queue_packet, 'ffob',
803 cl->borrow_->depth_);
804 }
805 }
806 #endif /* !ALTQ */
807 }
808
809 if (_rmc_addq(cl, m) < 0)
810 /* failed */
811 return (-1);
812
813 if (is_empty) {
814 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
815 ifd->na_[cpri]++;
816 }
817
818 if (qlen(cl->q_) > qlimit(cl->q_)) {
819 /* note: qlimit can be set to 0 or 1 */
820 rmc_drop_action(cl);
821 return (-1);
822 }
823 return (0);
824 }
825
826 /*
827 * void
828 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
829 * classes to see if there are satified.
830 */
831
832 static void
833 rmc_tl_satisfied(ifd, now)
834 struct rm_ifdat *ifd;
835 struct timeval *now;
836 {
837 int i;
838 rm_class_t *p, *bp;
839
840 for (i = RM_MAXPRIO - 1; i >= 0; i--) {
841 if ((bp = ifd->active_[i]) != NULL) {
842 p = bp;
843 do {
844 if (!rmc_satisfied(p, now)) {
845 ifd->cutoff_ = p->depth_;
846 return;
847 }
848 p = p->peer_;
849 } while (p != bp);
850 }
851 }
852
853 reset_cutoff(ifd);
854 }
855
856 /*
857 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
858 */
859
860 static int
861 rmc_satisfied(cl, now)
862 struct rm_class *cl;
863 struct timeval *now;
864 {
865 rm_class_t *p;
866
867 if (cl == NULL)
868 return (1);
869 if (TV_LT(now, &cl->undertime_))
870 return (1);
871 if (cl->depth_ == 0) {
872 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
873 return (0);
874 else
875 return (1);
876 }
877 if (cl->children_ != NULL) {
878 p = cl->children_;
879 while (p != NULL) {
880 if (!rmc_satisfied(p, now))
881 return (0);
882 p = p->next_;
883 }
884 }
885
886 return (1);
887 }
888
889 /*
890 * Return 1 if class 'cl' is under limit or can borrow from a parent,
891 * 0 if overlimit. As a side-effect, this routine will invoke the
892 * class overlimit action if the class if overlimit.
893 */
894
895 static int
896 rmc_under_limit(cl, now)
897 struct rm_class *cl;
898 struct timeval *now;
899 {
900 rm_class_t *p = cl;
901 rm_class_t *top;
902 struct rm_ifdat *ifd = cl->ifdat_;
903
904 ifd->borrowed_[ifd->qi_] = NULL;
905 /*
906 * If cl is the root class, then always return that it is
907 * underlimit. Otherwise, check to see if the class is underlimit.
908 */
909 if (cl->parent_ == NULL)
910 return (1);
911
912 if (cl->sleeping_) {
913 if (TV_LT(now, &cl->undertime_))
914 return (0);
915
916 CALLOUT_STOP(&cl->callout_);
917 cl->sleeping_ = 0;
918 cl->undertime_.tv_sec = 0;
919 return (1);
920 }
921
922 top = NULL;
923 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
924 if (((cl = cl->borrow_) == NULL) ||
925 (cl->depth_ > ifd->cutoff_)) {
926 #ifdef ADJUST_CUTOFF
927 if (cl != NULL)
928 /* cutoff is taking effect, just
929 return false without calling
930 the delay action. */
931 return (0);
932 #endif
933 #ifdef BORROW_OFFTIME
934 /*
935 * check if the class can borrow offtime too.
936 * borrow offtime from the top of the borrow
937 * chain if the top class is not overloaded.
938 */
939 if (cl != NULL) {
940 /* cutoff is taking effect, use this class as top. */
941 top = cl;
942 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
943 }
944 if (top != NULL && top->avgidle_ == top->minidle_)
945 top = NULL;
946 p->overtime_ = *now;
947 (p->overlimit)(p, top);
948 #else
949 p->overtime_ = *now;
950 (p->overlimit)(p, NULL);
951 #endif
952 return (0);
953 }
954 top = cl;
955 }
956
957 if (cl != p)
958 ifd->borrowed_[ifd->qi_] = cl;
959 return (1);
960 }
961
962 /*
963 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
964 * Packet-by-packet round robin.
965 *
966 * The heart of the weigthed round-robin scheduler, which decides which
967 * class next gets to send a packet. Highest priority first, then
968 * weighted round-robin within priorites.
969 *
970 * Each able-to-send class gets to send until its byte allocation is
971 * exhausted. Thus, the active pointer is only changed after a class has
972 * exhausted its allocation.
973 *
974 * If the scheduler finds no class that is underlimit or able to borrow,
975 * then the first class found that had a nonzero queue and is allowed to
976 * borrow gets to send.
977 */
978
979 static mbuf_t *
980 _rmc_wrr_dequeue_next(ifd, op)
981 struct rm_ifdat *ifd;
982 int op;
983 {
984 struct rm_class *cl = NULL, *first = NULL;
985 u_int deficit;
986 int cpri;
987 mbuf_t *m;
988 struct timeval now;
989
990 RM_GETTIME(now);
991
992 /*
993 * if the driver polls the top of the queue and then removes
994 * the polled packet, we must return the same packet.
995 */
996 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
997 cl = ifd->pollcache_;
998 cpri = cl->pri_;
999 if (ifd->efficient_) {
1000 /* check if this class is overlimit */
1001 if (cl->undertime_.tv_sec != 0 &&
1002 rmc_under_limit(cl, &now) == 0)
1003 first = cl;
1004 }
1005 ifd->pollcache_ = NULL;
1006 goto _wrr_out;
1007 }
1008 else {
1009 /* mode == ALTDQ_POLL || pollcache == NULL */
1010 ifd->pollcache_ = NULL;
1011 ifd->borrowed_[ifd->qi_] = NULL;
1012 }
1013 #ifdef ADJUST_CUTOFF
1014 _again:
1015 #endif
1016 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1017 if (ifd->na_[cpri] == 0)
1018 continue;
1019 deficit = 0;
1020 /*
1021 * Loop through twice for a priority level, if some class
1022 * was unable to send a packet the first round because
1023 * of the weighted round-robin mechanism.
1024 * During the second loop at this level, deficit==2.
1025 * (This second loop is not needed if for every class,
1026 * "M[cl->pri_])" times "cl->allotment" is greater than
1027 * the byte size for the largest packet in the class.)
1028 */
1029 _wrr_loop:
1030 cl = ifd->active_[cpri];
1031 ASSERT(cl != NULL);
1032 do {
1033 if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
1034 cl->bytes_alloc_ += cl->w_allotment_;
1035 if (!qempty(cl->q_)) {
1036 if ((cl->undertime_.tv_sec == 0) ||
1037 rmc_under_limit(cl, &now)) {
1038 if (cl->bytes_alloc_ > 0 || deficit > 1)
1039 goto _wrr_out;
1040
1041 /* underlimit but no alloc */
1042 deficit = 1;
1043 #if 1
1044 ifd->borrowed_[ifd->qi_] = NULL;
1045 #endif
1046 }
1047 else if (first == NULL && cl->borrow_ != NULL)
1048 first = cl; /* borrowing candidate */
1049 }
1050
1051 cl->bytes_alloc_ = 0;
1052 cl = cl->peer_;
1053 } while (cl != ifd->active_[cpri]);
1054
1055 if (deficit == 1) {
1056 /* first loop found an underlimit class with deficit */
1057 /* Loop on same priority level, with new deficit. */
1058 deficit = 2;
1059 goto _wrr_loop;
1060 }
1061 }
1062
1063 #ifdef ADJUST_CUTOFF
1064 /*
1065 * no underlimit class found. if cutoff is taking effect,
1066 * increase cutoff and try again.
1067 */
1068 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1069 ifd->cutoff_++;
1070 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1071 goto _again;
1072 }
1073 #endif /* ADJUST_CUTOFF */
1074 /*
1075 * If LINK_EFFICIENCY is turned on, then the first overlimit
1076 * class we encounter will send a packet if all the classes
1077 * of the link-sharing structure are overlimit.
1078 */
1079 reset_cutoff(ifd);
1080 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1081
1082 if (!ifd->efficient_ || first == NULL)
1083 return (NULL);
1084
1085 cl = first;
1086 cpri = cl->pri_;
1087 #if 0 /* too time-consuming for nothing */
1088 if (cl->sleeping_)
1089 CALLOUT_STOP(&cl->callout_);
1090 cl->sleeping_ = 0;
1091 cl->undertime_.tv_sec = 0;
1092 #endif
1093 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1094 ifd->cutoff_ = cl->borrow_->depth_;
1095
1096 /*
1097 * Deque the packet and do the book keeping...
1098 */
1099 _wrr_out:
1100 if (op == ALTDQ_REMOVE) {
1101 m = _rmc_getq(cl);
1102 if (m == NULL)
1103 panic("_rmc_wrr_dequeue_next");
1104 if (qempty(cl->q_))
1105 ifd->na_[cpri]--;
1106
1107 /*
1108 * Update class statistics and link data.
1109 */
1110 if (cl->bytes_alloc_ > 0)
1111 cl->bytes_alloc_ -= m_pktlen(m);
1112
1113 if ((cl->bytes_alloc_ <= 0) || first == cl)
1114 ifd->active_[cl->pri_] = cl->peer_;
1115 else
1116 ifd->active_[cl->pri_] = cl;
1117
1118 ifd->class_[ifd->qi_] = cl;
1119 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1120 ifd->now_[ifd->qi_] = now;
1121 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1122 ifd->queued_++;
1123 } else {
1124 /* mode == ALTDQ_PPOLL */
1125 m = _rmc_pollq(cl);
1126 ifd->pollcache_ = cl;
1127 }
1128 return (m);
1129 }
1130
1131 /*
1132 * Dequeue & return next packet from the highest priority class that
1133 * has a packet to send & has enough allocation to send it. This
1134 * routine is called by a driver whenever it needs a new packet to
1135 * output.
1136 */
1137 static mbuf_t *
1138 _rmc_prr_dequeue_next(ifd, op)
1139 struct rm_ifdat *ifd;
1140 int op;
1141 {
1142 mbuf_t *m;
1143 int cpri;
1144 struct rm_class *cl, *first = NULL;
1145 struct timeval now;
1146
1147 RM_GETTIME(now);
1148
1149 /*
1150 * if the driver polls the top of the queue and then removes
1151 * the polled packet, we must return the same packet.
1152 */
1153 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1154 cl = ifd->pollcache_;
1155 cpri = cl->pri_;
1156 ifd->pollcache_ = NULL;
1157 goto _prr_out;
1158 } else {
1159 /* mode == ALTDQ_POLL || pollcache == NULL */
1160 ifd->pollcache_ = NULL;
1161 ifd->borrowed_[ifd->qi_] = NULL;
1162 }
1163 #ifdef ADJUST_CUTOFF
1164 _again:
1165 #endif
1166 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1167 if (ifd->na_[cpri] == 0)
1168 continue;
1169 cl = ifd->active_[cpri];
1170 ASSERT(cl != NULL);
1171 do {
1172 if (!qempty(cl->q_)) {
1173 if ((cl->undertime_.tv_sec == 0) ||
1174 rmc_under_limit(cl, &now))
1175 goto _prr_out;
1176 if (first == NULL && cl->borrow_ != NULL)
1177 first = cl;
1178 }
1179 cl = cl->peer_;
1180 } while (cl != ifd->active_[cpri]);
1181 }
1182
1183 #ifdef ADJUST_CUTOFF
1184 /*
1185 * no underlimit class found. if cutoff is taking effect, increase
1186 * cutoff and try again.
1187 */
1188 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1189 ifd->cutoff_++;
1190 goto _again;
1191 }
1192 #endif /* ADJUST_CUTOFF */
1193 /*
1194 * If LINK_EFFICIENCY is turned on, then the first overlimit
1195 * class we encounter will send a packet if all the classes
1196 * of the link-sharing structure are overlimit.
1197 */
1198 reset_cutoff(ifd);
1199 if (!ifd->efficient_ || first == NULL)
1200 return (NULL);
1201
1202 cl = first;
1203 cpri = cl->pri_;
1204 #if 0 /* too time-consuming for nothing */
1205 if (cl->sleeping_)
1206 CALLOUT_STOP(&cl->callout_);
1207 cl->sleeping_ = 0;
1208 cl->undertime_.tv_sec = 0;
1209 #endif
1210 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1211 ifd->cutoff_ = cl->borrow_->depth_;
1212
1213 /*
1214 * Deque the packet and do the book keeping...
1215 */
1216 _prr_out:
1217 if (op == ALTDQ_REMOVE) {
1218 m = _rmc_getq(cl);
1219 if (m == NULL)
1220 panic("_rmc_prr_dequeue_next");
1221 if (qempty(cl->q_))
1222 ifd->na_[cpri]--;
1223
1224 ifd->active_[cpri] = cl->peer_;
1225
1226 ifd->class_[ifd->qi_] = cl;
1227 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1228 ifd->now_[ifd->qi_] = now;
1229 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1230 ifd->queued_++;
1231 } else {
1232 /* mode == ALTDQ_POLL */
1233 m = _rmc_pollq(cl);
1234 ifd->pollcache_ = cl;
1235 }
1236 return (m);
1237 }
1238
1239 /*
1240 * mbuf_t *
1241 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1242 * is invoked by the packet driver to get the next packet to be
1243 * dequeued and output on the link. If WRR is enabled, then the
1244 * WRR dequeue next routine will determine the next packet to sent.
1245 * Otherwise, packet-by-packet round robin is invoked.
1246 *
1247 * Returns: NULL, if a packet is not available or if all
1248 * classes are overlimit.
1249 *
1250 * Otherwise, Pointer to the next packet.
1251 */
1252
1253 mbuf_t *
1254 rmc_dequeue_next(ifd, mode)
1255 struct rm_ifdat *ifd;
1256 int mode;
1257 {
1258 if (ifd->queued_ >= ifd->maxqueued_)
1259 return (NULL);
1260 else if (ifd->wrr_)
1261 return (_rmc_wrr_dequeue_next(ifd, mode));
1262 else
1263 return (_rmc_prr_dequeue_next(ifd, mode));
1264 }
1265
1266 /*
1267 * Update the utilization estimate for the packet that just completed.
1268 * The packet's class & the parent(s) of that class all get their
1269 * estimators updated. This routine is called by the driver's output-
1270 * packet-completion interrupt service routine.
1271 */
1272
1273 /*
1274 * a macro to approximate "divide by 1000" that gives 0.000999,
1275 * if a value has enough effective digits.
1276 * (on pentium, mul takes 9 cycles but div takes 46!)
1277 */
1278 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1279 void
1280 rmc_update_class_util(ifd)
1281 struct rm_ifdat *ifd;
1282 {
1283 int idle, avgidle, pktlen;
1284 int pkt_time, tidle;
1285 rm_class_t *cl, *borrowed;
1286 rm_class_t *borrows;
1287 struct timeval *nowp;
1288
1289 /*
1290 * Get the most recent completed class.
1291 */
1292 if ((cl = ifd->class_[ifd->qo_]) == NULL)
1293 return;
1294
1295 pktlen = ifd->curlen_[ifd->qo_];
1296 borrowed = ifd->borrowed_[ifd->qo_];
1297 borrows = borrowed;
1298
1299 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1300
1301 /*
1302 * Run estimator on class and it's ancesstors.
1303 */
1304 /*
1305 * rm_update_class_util is designed to be called when the
1306 * transfer is completed from a xmit complete interrupt,
1307 * but most drivers don't implement an upcall for that.
1308 * so, just use estimated completion time.
1309 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1310 */
1311 nowp = &ifd->now_[ifd->qo_];
1312 /* get pkt_time (for link) in usec */
1313 #if 1 /* use approximation */
1314 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1315 pkt_time = NSEC_TO_USEC(pkt_time);
1316 #else
1317 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1318 #endif
1319 #if 1 /* ALTQ4PPP */
1320 if (TV_LT(nowp, &ifd->ifnow_)) {
1321 int iftime;
1322
1323 /*
1324 * make sure the estimated completion time does not go
1325 * too far. it can happen when the link layer supports
1326 * data compression or the interface speed is set to
1327 * a much lower value.
1328 */
1329 TV_DELTA(&ifd->ifnow_, nowp, iftime);
1330 if (iftime+pkt_time < ifd->maxiftime_) {
1331 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1332 } else {
1333 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1334 }
1335 } else {
1336 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1337 }
1338 #else
1339 if (TV_LT(nowp, &ifd->ifnow_)) {
1340 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1341 } else {
1342 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1343 }
1344 #endif
1345
1346 while (cl != NULL) {
1347 TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1348 if (idle >= 2000000)
1349 /*
1350 * this class is idle enough, reset avgidle.
1351 * (TV_DELTA returns 2000000 us when delta is large.)
1352 */
1353 cl->avgidle_ = cl->maxidle_;
1354
1355 /* get pkt_time (for class) in usec */
1356 #if 1 /* use approximation */
1357 pkt_time = pktlen * cl->ns_per_byte_;
1358 pkt_time = NSEC_TO_USEC(pkt_time);
1359 #else
1360 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1361 #endif
1362 idle -= pkt_time;
1363
1364 avgidle = cl->avgidle_;
1365 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1366 cl->avgidle_ = avgidle;
1367
1368 /* Are we overlimit ? */
1369 if (avgidle <= 0) {
1370 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1371 #if 1 /* ALTQ */
1372 /*
1373 * need some lower bound for avgidle, otherwise
1374 * a borrowing class gets unbounded penalty.
1375 */
1376 if (avgidle < cl->minidle_)
1377 avgidle = cl->avgidle_ = cl->minidle_;
1378 #endif
1379 /* set next idle to make avgidle 0 */
1380 tidle = pkt_time +
1381 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1382 TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1383 ++cl->stats_.over;
1384 } else {
1385 cl->avgidle_ =
1386 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1387 cl->undertime_.tv_sec = 0;
1388 if (cl->sleeping_) {
1389 CALLOUT_STOP(&cl->callout_);
1390 cl->sleeping_ = 0;
1391 }
1392 }
1393
1394 if (borrows != NULL) {
1395 if (borrows != cl)
1396 ++cl->stats_.borrows;
1397 else
1398 borrows = NULL;
1399 }
1400 cl->last_ = ifd->ifnow_;
1401 cl->last_pkttime_ = pkt_time;
1402
1403 #if 1
1404 if (cl->parent_ == NULL) {
1405 /* take stats of root class */
1406 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1407 }
1408 #endif
1409
1410 cl = cl->parent_;
1411 }
1412
1413 /*
1414 * Check to see if cutoff needs to set to a new level.
1415 */
1416 cl = ifd->class_[ifd->qo_];
1417 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1418 #if 1 /* ALTQ */
1419 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1420 rmc_tl_satisfied(ifd, nowp);
1421 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1422 } else {
1423 ifd->cutoff_ = borrowed->depth_;
1424 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1425 }
1426 #else /* !ALTQ */
1427 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1428 reset_cutoff(ifd);
1429 #ifdef notdef
1430 rmc_tl_satisfied(ifd, &now);
1431 #endif
1432 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1433 } else {
1434 ifd->cutoff_ = borrowed->depth_;
1435 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1436 }
1437 #endif /* !ALTQ */
1438 }
1439
1440 /*
1441 * Release class slot
1442 */
1443 ifd->borrowed_[ifd->qo_] = NULL;
1444 ifd->class_[ifd->qo_] = NULL;
1445 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1446 ifd->queued_--;
1447 }
1448
1449 /*
1450 * void
1451 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1452 * over-limit action routines. These get invoked by rmc_under_limit()
1453 * if a class with packets to send if over its bandwidth limit & can't
1454 * borrow from a parent class.
1455 *
1456 * Returns: NONE
1457 */
1458
1459 static void
1460 rmc_drop_action(cl)
1461 struct rm_class *cl;
1462 {
1463 struct rm_ifdat *ifd = cl->ifdat_;
1464
1465 ASSERT(qlen(cl->q_) > 0);
1466 _rmc_dropq(cl);
1467 if (qempty(cl->q_))
1468 ifd->na_[cl->pri_]--;
1469 }
1470
1471 void rmc_dropall(cl)
1472 struct rm_class *cl;
1473 {
1474 struct rm_ifdat *ifd = cl->ifdat_;
1475
1476 if (!qempty(cl->q_)) {
1477 _flushq(cl->q_);
1478
1479 ifd->na_[cl->pri_]--;
1480 }
1481 }
1482
1483 #if (__FreeBSD_version > 300000)
1484 /* hzto() is removed from FreeBSD-3.0 */
1485 static int hzto __P((struct timeval *));
1486
1487 static int
1488 hzto(tv)
1489 struct timeval *tv;
1490 {
1491 struct timeval t2;
1492
1493 getmicrotime(&t2);
1494 t2.tv_sec = tv->tv_sec - t2.tv_sec;
1495 t2.tv_usec = tv->tv_usec - t2.tv_usec;
1496 return (tvtohz(&t2));
1497 }
1498 #endif /* __FreeBSD_version > 300000 */
1499
1500 /*
1501 * void
1502 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1503 * delay action routine. It is invoked via rmc_under_limit when the
1504 * packet is discoverd to be overlimit.
1505 *
1506 * If the delay action is result of borrow class being overlimit, then
1507 * delay for the offtime of the borrowing class that is overlimit.
1508 *
1509 * Returns: NONE
1510 */
1511
1512 void
1513 rmc_delay_action(cl, borrow)
1514 struct rm_class *cl, *borrow;
1515 {
1516 int delay, t, extradelay;
1517
1518 cl->stats_.overactions++;
1519 TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
1520 #ifndef BORROW_OFFTIME
1521 delay += cl->offtime_;
1522 #endif
1523
1524 if (!cl->sleeping_) {
1525 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1526 #ifdef BORROW_OFFTIME
1527 if (borrow != NULL)
1528 extradelay = borrow->offtime_;
1529 else
1530 #endif
1531 extradelay = cl->offtime_;
1532
1533 #ifdef ALTQ
1534 /*
1535 * XXX recalculate suspend time:
1536 * current undertime is (tidle + pkt_time) calculated
1537 * from the last transmission.
1538 * tidle: time required to bring avgidle back to 0
1539 * pkt_time: target waiting time for this class
1540 * we need to replace pkt_time by offtime
1541 */
1542 extradelay -= cl->last_pkttime_;
1543 #endif
1544 if (extradelay > 0) {
1545 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1546 delay += extradelay;
1547 }
1548
1549 cl->sleeping_ = 1;
1550 cl->stats_.delays++;
1551
1552 /*
1553 * Since packets are phased randomly with respect to the
1554 * clock, 1 tick (the next clock tick) can be an arbitrarily
1555 * short time so we have to wait for at least two ticks.
1556 * NOTE: If there's no other traffic, we need the timer as
1557 * a 'backstop' to restart this class.
1558 */
1559 if (delay > tick * 2) {
1560 #ifdef __FreeBSD__
1561 /* FreeBSD rounds up the tick */
1562 t = hzto(&cl->undertime_);
1563 #else
1564 /* other BSDs round down the tick */
1565 t = hzto(&cl->undertime_) + 1;
1566 #endif
1567 } else
1568 t = 2;
1569 CALLOUT_RESET(&cl->callout_, t,
1570 (timeout_t *)rmc_restart, (caddr_t)cl);
1571 }
1572 }
1573
1574 /*
1575 * void
1576 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1577 * called by the system timer code & is responsible checking if the
1578 * class is still sleeping (it might have been restarted as a side
1579 * effect of the queue scan on a packet arrival) and, if so, restarting
1580 * output for the class. Inspecting the class state & restarting output
1581 * require locking the class structure. In general the driver is
1582 * responsible for locking but this is the only routine that is not
1583 * called directly or indirectly from the interface driver so it has
1584 * know about system locking conventions. Under bsd, locking is done
1585 * by raising IPL to splnet so that's what's implemented here. On a
1586 * different system this would probably need to be changed.
1587 *
1588 * Returns: NONE
1589 */
1590
1591 static void
1592 rmc_restart(cl)
1593 struct rm_class *cl;
1594 {
1595 struct rm_ifdat *ifd = cl->ifdat_;
1596 int s;
1597
1598 s = splnet();
1599 if (cl->sleeping_) {
1600 cl->sleeping_ = 0;
1601 cl->undertime_.tv_sec = 0;
1602
1603 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1604 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1605 (ifd->restart)(ifd->ifq_);
1606 }
1607 }
1608 splx(s);
1609 }
1610
1611 /*
1612 * void
1613 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1614 * handling routine for the root class of the link sharing structure.
1615 *
1616 * Returns: NONE
1617 */
1618
1619 static void
1620 rmc_root_overlimit(cl, borrow)
1621 struct rm_class *cl, *borrow;
1622 {
1623 panic("rmc_root_overlimit");
1624 }
1625
1626 /*
1627 * Packet Queue handling routines. Eventually, this is to localize the
1628 * effects on the code whether queues are red queues or droptail
1629 * queues.
1630 */
1631
1632 static int
1633 _rmc_addq(cl, m)
1634 rm_class_t *cl;
1635 mbuf_t *m;
1636 {
1637 #ifdef ALTQ_RIO
1638 if (q_is_rio(cl->q_))
1639 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1640 #endif
1641 #ifdef ALTQ_RED
1642 if (q_is_red(cl->q_))
1643 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1644 #endif /* ALTQ_RED */
1645
1646 if (cl->flags_ & RMCF_CLEARDSCP)
1647 write_dsfield(m, cl->pktattr_, 0);
1648
1649 _addq(cl->q_, m);
1650 return (0);
1651 }
1652
1653 /* note: _rmc_dropq is not called for red */
1654 static void
1655 _rmc_dropq(cl)
1656 rm_class_t *cl;
1657 {
1658 mbuf_t *m;
1659
1660 if ((m = _getq(cl->q_)) != NULL)
1661 m_freem(m);
1662 }
1663
1664 static mbuf_t *
1665 _rmc_getq(cl)
1666 rm_class_t *cl;
1667 {
1668 #ifdef ALTQ_RIO
1669 if (q_is_rio(cl->q_))
1670 return rio_getq((rio_t *)cl->red_, cl->q_);
1671 #endif
1672 #ifdef ALTQ_RED
1673 if (q_is_red(cl->q_))
1674 return red_getq(cl->red_, cl->q_);
1675 #endif
1676 return _getq(cl->q_);
1677 }
1678
1679 static mbuf_t *
1680 _rmc_pollq(cl)
1681 rm_class_t *cl;
1682 {
1683 return qhead(cl->q_);
1684 }
1685
1686 #ifdef CBQ_TRACE
1687
1688 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1689 struct cbqtrace *cbqtrace_ptr = NULL;
1690 int cbqtrace_count;
1691
1692 /*
1693 * DDB hook to trace cbq events:
1694 * the last 1024 events are held in a circular buffer.
1695 * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1696 */
1697 void cbqtrace_dump(int);
1698 static char *rmc_funcname(void *);
1699
1700 static struct rmc_funcs {
1701 void *func;
1702 char *name;
1703 } rmc_funcs[] =
1704 {
1705 rmc_init, "rmc_init",
1706 rmc_queue_packet, "rmc_queue_packet",
1707 rmc_under_limit, "rmc_under_limit",
1708 rmc_update_class_util, "rmc_update_class_util",
1709 rmc_delay_action, "rmc_delay_action",
1710 rmc_restart, "rmc_restart",
1711 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1712 NULL, NULL
1713 };
1714
1715 static char *rmc_funcname(func)
1716 void *func;
1717 {
1718 struct rmc_funcs *fp;
1719
1720 for (fp = rmc_funcs; fp->func != NULL; fp++)
1721 if (fp->func == func)
1722 return (fp->name);
1723 return ("unknown");
1724 }
1725
1726 void cbqtrace_dump(counter)
1727 int counter;
1728 {
1729 int i, *p;
1730 char *cp;
1731
1732 counter = counter % NCBQTRACE;
1733 p = (int *)&cbqtrace_buffer[counter];
1734
1735 for (i=0; i<20; i++) {
1736 printf("[0x%x] ", *p++);
1737 printf("%s: ", rmc_funcname((void *)*p++));
1738 cp = (char *)p++;
1739 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1740 printf("%d\n",*p++);
1741
1742 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1743 p = (int *)cbqtrace_buffer;
1744 }
1745 }
1746 #endif /* CBQ_TRACE */
1747
1748 #endif /* ALTQ_CBQ */
1749
1750 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
1751 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1752
1753 void
1754 _addq(q, m)
1755 class_queue_t *q;
1756 mbuf_t *m;
1757 {
1758 mbuf_t *m0;
1759
1760 if ((m0 = qtail(q)) != NULL)
1761 m->m_nextpkt = m0->m_nextpkt;
1762 else
1763 m0 = m;
1764 m0->m_nextpkt = m;
1765 qtail(q) = m;
1766 qlen(q)++;
1767 }
1768
1769 mbuf_t *
1770 _getq(q)
1771 class_queue_t *q;
1772 {
1773 mbuf_t *m, *m0;
1774
1775 if ((m = qtail(q)) == NULL)
1776 return (NULL);
1777 if ((m0 = m->m_nextpkt) != m)
1778 m->m_nextpkt = m0->m_nextpkt;
1779 else {
1780 ASSERT(qlen(q) == 1);
1781 qtail(q) = NULL;
1782 }
1783 qlen(q)--;
1784 m0->m_nextpkt = NULL;
1785 return (m0);
1786 }
1787
1788 /* drop a packet at the tail of the queue */
1789 mbuf_t *
1790 _getq_tail(q)
1791 class_queue_t *q;
1792 {
1793 mbuf_t *m, *m0, *prev;
1794
1795 if ((m = m0 = qtail(q)) == NULL)
1796 return NULL;
1797 do {
1798 prev = m0;
1799 m0 = m0->m_nextpkt;
1800 } while (m0 != m);
1801 prev->m_nextpkt = m->m_nextpkt;
1802 if (prev == m) {
1803 ASSERT(qlen(q) == 1);
1804 qtail(q) = NULL;
1805 } else
1806 qtail(q) = prev;
1807 qlen(q)--;
1808 m->m_nextpkt = NULL;
1809 return (m);
1810 }
1811
1812 /* randomly select a packet in the queue */
1813 mbuf_t *
1814 _getq_random(q)
1815 class_queue_t *q;
1816 {
1817 struct mbuf *m;
1818 int i, n;
1819
1820 if ((m = qtail(q)) == NULL)
1821 return NULL;
1822 if (m->m_nextpkt == m) {
1823 ASSERT(qlen(q) == 1);
1824 qtail(q) = NULL;
1825 } else {
1826 struct mbuf *prev = NULL;
1827
1828 n = random() % qlen(q) + 1;
1829 for (i = 0; i < n; i++) {
1830 prev = m;
1831 m = m->m_nextpkt;
1832 }
1833 prev->m_nextpkt = m->m_nextpkt;
1834 if (m == qtail(q))
1835 qtail(q) = prev;
1836 }
1837 qlen(q)--;
1838 m->m_nextpkt = NULL;
1839 return (m);
1840 }
1841
1842 void
1843 _removeq(q, m)
1844 class_queue_t *q;
1845 mbuf_t *m;
1846 {
1847 mbuf_t *m0, *prev;
1848
1849 m0 = qtail(q);
1850 do {
1851 prev = m0;
1852 m0 = m0->m_nextpkt;
1853 } while (m0 != m);
1854 prev->m_nextpkt = m->m_nextpkt;
1855 if (prev == m)
1856 qtail(q) = NULL;
1857 else if (qtail(q) == m)
1858 qtail(q) = prev;
1859 qlen(q)--;
1860 }
1861
1862 void
1863 _flushq(q)
1864 class_queue_t *q;
1865 {
1866 mbuf_t *m;
1867
1868 while ((m = _getq(q)) != NULL)
1869 m_freem(m);
1870 ASSERT(qlen(q) == 0);
1871 }
1872
1873 #endif /* !__GNUC__ || ALTQ_DEBUG */
1874 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
1875