altq_rmclass.c revision 1.1 1 /* $KAME: altq_rmclass.c,v 1.9 2000/12/14 08:12:46 thorpej Exp $ */
2
3 /*
4 * Copyright (c) 1991-1997 Regents of the University of California.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed by the Network Research
18 * Group at Lawrence Berkeley Laboratory.
19 * 4. Neither the name of the University nor of the Laboratory may be used
20 * to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 *
35 * LBL code modified by speer (at) eng.sun.com, May 1977.
36 * For questions and/or comments, please send mail to cbq (at) ee.lbl.gov
37 */
38
39 #ident "@(#)rm_class.c 1.48 97/12/05 SMI"
40
41 #if defined(__FreeBSD__) || defined(__NetBSD__)
42 #include "opt_altq.h"
43 #if (__FreeBSD__ != 2)
44 #include "opt_inet.h"
45 #ifdef __FreeBSD__
46 #include "opt_inet6.h"
47 #endif
48 #endif
49 #endif /* __FreeBSD__ || __NetBSD__ */
50 #ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */
51
52 #include <sys/param.h>
53 #include <sys/malloc.h>
54 #include <sys/mbuf.h>
55 #include <sys/socket.h>
56 #include <sys/systm.h>
57 #include <sys/errno.h>
58 #include <sys/time.h>
59 #include <sys/kernel.h>
60
61 #include <net/if.h>
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/ip.h>
65
66 #include <altq/altq.h>
67 #include <altq/altq_rmclass.h>
68 #include <altq/altq_rmclass_debug.h>
69 #include <altq/altq_red.h>
70 #include <altq/altq_rio.h>
71
72 /*
73 * Local Macros
74 */
75
76 #define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; }
77
78 /*
79 * Local routines.
80 */
81
82 static int rmc_satisfied __P((struct rm_class *, struct timeval *));
83 static void rmc_wrr_set_weights __P((struct rm_ifdat *));
84 static void rmc_depth_compute __P((struct rm_class *));
85 static void rmc_depth_recompute __P((rm_class_t *));
86
87 static mbuf_t *_rmc_wrr_dequeue_next __P((struct rm_ifdat *, int));
88 static mbuf_t *_rmc_prr_dequeue_next __P((struct rm_ifdat *, int));
89
90 static int _rmc_addq __P((rm_class_t *, mbuf_t *));
91 static void _rmc_dropq __P((rm_class_t *));
92 static mbuf_t *_rmc_getq __P((rm_class_t *));
93 static mbuf_t *_rmc_pollq __P((rm_class_t *));
94
95 static int rmc_under_limit __P((struct rm_class *, struct timeval *));
96 static void rmc_tl_satisfied __P((struct rm_ifdat *, struct timeval *));
97 static void rmc_drop_action __P((struct rm_class *));
98 static void rmc_restart __P((struct rm_class *));
99 static void rmc_root_overlimit __P((struct rm_class *, struct rm_class *));
100
101 #define BORROW_OFFTIME
102 /*
103 * BORROW_OFFTIME (experimental):
104 * borrow the offtime of the class borrowing from.
105 * the reason is that when its own offtime is set, the class is unable
106 * to borrow much, especially when cutoff is taking effect.
107 * but when the borrowed class is overloaded (advidle is close to minidle),
108 * use the borrowing class's offtime to avoid overload.
109 */
110 #define ADJUST_CUTOFF
111 /*
112 * ADJUST_CUTOFF (experimental):
113 * if no underlimit class is found due to cutoff, increase cutoff and
114 * retry the scheduling loop.
115 * also, don't invoke delay_actions while cutoff is taking effect,
116 * since a sleeping class won't have a chance to be scheduled in the
117 * next loop.
118 *
119 * now heuristics for setting the top-level variable (cutoff_) becomes:
120 * 1. if a packet arrives for a not-overlimit class, set cutoff
121 * to the depth of the class.
122 * 2. if cutoff is i, and a packet arrives for an overlimit class
123 * with an underlimit ancestor at a lower level than i (say j),
124 * then set cutoff to j.
125 * 3. at scheduling a packet, if there is no underlimit class
126 * due to the current cutoff level, increase cutoff by 1 and
127 * then try to schedule again.
128 */
129
130 /*
131 * rm_class_t *
132 * rmc_newclass(...) - Create a new resource management class at priority
133 * 'pri' on the interface given by 'ifd'.
134 *
135 * nsecPerByte is the data rate of the interface in nanoseconds/byte.
136 * E.g., 800 for a 10Mb/s ethernet. If the class gets less
137 * than 100% of the bandwidth, this number should be the
138 * 'effective' rate for the class. Let f be the
139 * bandwidth fraction allocated to this class, and let
140 * nsPerByte be the data rate of the output link in
141 * nanoseconds/byte. Then nsecPerByte is set to
142 * nsPerByte / f. E.g., 1600 (= 800 / .5)
143 * for a class that gets 50% of an ethernet's bandwidth.
144 *
145 * action the routine to call when the class is over limit.
146 *
147 * maxq max allowable queue size for class (in packets).
148 *
149 * parent parent class pointer.
150 *
151 * borrow class to borrow from (should be either 'parent' or null).
152 *
153 * maxidle max value allowed for class 'idle' time estimate (this
154 * parameter determines how large an initial burst of packets
155 * can be before overlimit action is invoked.
156 *
157 * offtime how long 'delay' action will delay when class goes over
158 * limit (this parameter determines the steady-state burst
159 * size when a class is running over its limit).
160 *
161 * Maxidle and offtime have to be computed from the following: If the
162 * average packet size is s, the bandwidth fraction allocated to this
163 * class is f, we want to allow b packet bursts, and the gain of the
164 * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then:
165 *
166 * ptime = s * nsPerByte * (1 - f) / f
167 * maxidle = ptime * (1 - g^b) / g^b
168 * minidle = -ptime * (1 / (f - 1))
169 * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1)
170 *
171 * Operationally, it's convenient to specify maxidle & offtime in units
172 * independent of the link bandwidth so the maxidle & offtime passed to
173 * this routine are the above values multiplied by 8*f/(1000*nsPerByte).
174 * (The constant factor is a scale factor needed to make the parameters
175 * integers. This scaling also means that the 'unscaled' values of
176 * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds,
177 * not nanoseconds.) Also note that the 'idle' filter computation keeps
178 * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of
179 * maxidle also must be scaled upward by this value. Thus, the passed
180 * values for maxidle and offtime can be computed as follows:
181 *
182 * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte)
183 * offtime = offtime * 8 / (1000 * nsecPerByte)
184 *
185 * When USE_HRTIME is employed, then maxidle and offtime become:
186 * maxidle = maxilde * (8.0 / nsecPerByte);
187 * offtime = offtime * (8.0 / nsecPerByte);
188 */
189
190 struct rm_class *
191 rmc_newclass(pri, ifd, nsecPerByte, action, maxq, parent, borrow,
192 maxidle, minidle, offtime, pktsize, flags)
193 int pri;
194 struct rm_ifdat *ifd;
195 u_int nsecPerByte;
196 void (*action)(rm_class_t *, rm_class_t *);
197 int maxq;
198 struct rm_class *parent;
199 struct rm_class *borrow;
200 u_int maxidle;
201 int minidle;
202 u_int offtime;
203 int pktsize;
204 int flags;
205 {
206 struct rm_class *cl;
207 struct rm_class *peer;
208 int s;
209
210 if (pri >= RM_MAXPRIO)
211 return (NULL);
212 #ifndef ALTQ_RED
213 if (flags & RMCF_RED) {
214 printf("rmc_newclass: RED not configured for CBQ!\n");
215 return (NULL);
216 }
217 #endif
218 #ifndef ALTQ_RIO
219 if (flags & RMCF_RIO) {
220 printf("rmc_newclass: RIO not configured for CBQ!\n");
221 return (NULL);
222 }
223 #endif
224
225 MALLOC(cl, struct rm_class *, sizeof(struct rm_class),
226 M_DEVBUF, M_WAITOK);
227 if (cl == NULL)
228 return (NULL);
229 bzero(cl, sizeof(struct rm_class));
230 CALLOUT_INIT(&cl->callout_);
231 MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t),
232 M_DEVBUF, M_WAITOK);
233 if (cl->q_ == NULL) {
234 FREE(cl, M_DEVBUF);
235 return (NULL);
236 }
237 bzero(cl->q_, sizeof(class_queue_t));
238
239 /*
240 * Class initialization.
241 */
242 cl->children_ = NULL;
243 cl->parent_ = parent;
244 cl->borrow_ = borrow;
245 cl->leaf_ = 1;
246 cl->ifdat_ = ifd;
247 cl->pri_ = pri;
248 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
249 cl->depth_ = 0;
250 cl->qthresh_ = 0;
251 cl->ns_per_byte_ = nsecPerByte;
252
253 qlimit(cl->q_) = maxq;
254 qtype(cl->q_) = Q_DROPHEAD;
255 qlen(cl->q_) = 0;
256 cl->flags_ = flags;
257
258 #if 1 /* minidle is also scaled in ALTQ */
259 cl->minidle_ = (minidle * (int)nsecPerByte) / 8;
260 if (cl->minidle_ > 0)
261 cl->minidle_ = 0;
262 #else
263 cl->minidle_ = minidle;
264 #endif
265 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
266 if (cl->maxidle_ == 0)
267 cl->maxidle_ = 1;
268 #if 1 /* offtime is also scaled in ALTQ */
269 cl->avgidle_ = cl->maxidle_;
270 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
271 if (cl->offtime_ == 0)
272 cl->offtime_ = 1;
273 #else
274 cl->avgidle_ = 0;
275 cl->offtime_ = (offtime * nsecPerByte) / 8;
276 #endif
277 cl->overlimit = action;
278
279 #ifdef ALTQ_RED
280 if (flags & (RMCF_RED|RMCF_RIO)) {
281 int red_flags, red_pkttime;
282
283 red_flags = 0;
284 if (flags & RMCF_ECN)
285 red_flags |= REDF_ECN;
286 if (flags & RMCF_FLOWVALVE)
287 red_flags |= REDF_FLOWVALVE;
288 #ifdef ALTQ_RIO
289 if (flags & RMCF_CLEARDSCP)
290 red_flags |= RIOF_CLEARDSCP;
291 #endif
292 red_pkttime = nsecPerByte * pktsize / 1000;
293
294 if (flags & RMCF_RED) {
295 cl->red_ = red_alloc(0, 0, 0, 0,
296 red_flags, red_pkttime);
297 if (cl->red_ != NULL)
298 qtype(cl->q_) = Q_RED;
299 }
300 #ifdef ALTQ_RIO
301 else {
302 cl->red_ = (red_t *)rio_alloc(0, NULL,
303 red_flags, red_pkttime);
304 if (cl->red_ != NULL)
305 qtype(cl->q_) = Q_RIO;
306 }
307 #endif
308 }
309 #endif /* ALTQ_RED */
310
311 /*
312 * put the class into the class tree
313 */
314 s = splimp();
315 if ((peer = ifd->active_[pri]) != NULL) {
316 /* find the last class at this pri */
317 cl->peer_ = peer;
318 while (peer->peer_ != ifd->active_[pri])
319 peer = peer->peer_;
320 peer->peer_ = cl;
321 } else {
322 ifd->active_[pri] = cl;
323 cl->peer_ = cl;
324 }
325
326 if (cl->parent_) {
327 cl->next_ = parent->children_;
328 parent->children_ = cl;
329 parent->leaf_ = 0;
330 }
331
332 /*
333 * Compute the depth of this class and it's ancestors in the class
334 * hierarchy.
335 */
336 rmc_depth_compute(cl);
337
338 /*
339 * If CBQ's WRR is enabled, then initailize the class WRR state.
340 */
341 if (ifd->wrr_) {
342 ifd->num_[pri]++;
343 ifd->alloc_[pri] += cl->allotment_;
344 rmc_wrr_set_weights(ifd);
345 }
346 splx(s);
347 return (cl);
348 }
349
350 int
351 rmc_modclass(cl, nsecPerByte, maxq, maxidle, minidle, offtime, pktsize)
352 struct rm_class *cl;
353 u_int nsecPerByte;
354 int maxq;
355 u_int maxidle;
356 int minidle;
357 u_int offtime;
358 int pktsize;
359 {
360 struct rm_ifdat *ifd;
361 u_int old_allotment;
362 int s;
363
364 ifd = cl->ifdat_;
365 old_allotment = cl->allotment_;
366
367 s = splimp();
368 cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */
369 cl->qthresh_ = 0;
370 cl->ns_per_byte_ = nsecPerByte;
371
372 qlimit(cl->q_) = maxq;
373
374 #if 1 /* minidle is also scaled in ALTQ */
375 cl->minidle_ = (minidle * nsecPerByte) / 8;
376 if (cl->minidle_ > 0)
377 cl->minidle_ = 0;
378 #else
379 cl->minidle_ = minidle;
380 #endif
381 cl->maxidle_ = (maxidle * nsecPerByte) / 8;
382 if (cl->maxidle_ == 0)
383 cl->maxidle_ = 1;
384 #if 1 /* offtime is also scaled in ALTQ */
385 cl->avgidle_ = cl->maxidle_;
386 cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN;
387 if (cl->offtime_ == 0)
388 cl->offtime_ = 1;
389 #else
390 cl->avgidle_ = 0;
391 cl->offtime_ = (offtime * nsecPerByte) / 8;
392 #endif
393
394 /*
395 * If CBQ's WRR is enabled, then initailize the class WRR state.
396 */
397 if (ifd->wrr_) {
398 ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment;
399 rmc_wrr_set_weights(ifd);
400 }
401 splx(s);
402 return (0);
403 }
404
405 /*
406 * static void
407 * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes
408 * the appropriate run robin weights for the CBQ weighted round robin
409 * algorithm.
410 *
411 * Returns: NONE
412 */
413
414 static void
415 rmc_wrr_set_weights(ifd)
416 struct rm_ifdat *ifd;
417 {
418 int i;
419 struct rm_class *cl, *clh;
420
421 for (i = 0; i < RM_MAXPRIO; i++) {
422 /*
423 * This is inverted from that of the simulator to
424 * maintain precision.
425 */
426 if (ifd->num_[i] == 0)
427 ifd->M_[i] = 0;
428 else
429 ifd->M_[i] = ifd->alloc_[i] /
430 (ifd->num_[i] * ifd->maxpkt_);
431 /*
432 * Compute the weigthed allotment for each class.
433 * This takes the expensive div instruction out
434 * of the main loop for the wrr scheduling path.
435 * These only get recomputed when a class comes or
436 * goes.
437 */
438 if (ifd->active_[i] != NULL) {
439 clh = cl = ifd->active_[i];
440 do {
441 /* safe-guard for slow link or alloc_ == 0 */
442 if (ifd->M_[i] == 0)
443 cl->w_allotment_ = 0;
444 else
445 cl->w_allotment_ = cl->allotment_ /
446 ifd->M_[i];
447 cl = cl->peer_;
448 } while ((cl != NULL) && (cl != clh));
449 }
450 }
451 }
452
453 int
454 rmc_get_weight(ifd, pri)
455 struct rm_ifdat *ifd;
456 int pri;
457 {
458 if ((pri >= 0) && (pri < RM_MAXPRIO))
459 return (ifd->M_[pri]);
460 else
461 return (0);
462 }
463
464 /*
465 * static void
466 * rmc_depth_compute(struct rm_class *cl) - This function computes the
467 * appropriate depth of class 'cl' and its ancestors.
468 *
469 * Returns: NONE
470 */
471
472 static void
473 rmc_depth_compute(cl)
474 struct rm_class *cl;
475 {
476 rm_class_t *t = cl, *p;
477
478 /*
479 * Recompute the depth for the branch of the tree.
480 */
481 while (t != NULL) {
482 p = t->parent_;
483 if (p && (t->depth_ >= p->depth_)) {
484 p->depth_ = t->depth_ + 1;
485 t = p;
486 } else
487 t = NULL;
488 }
489 }
490
491 /*
492 * static void
493 * rmc_depth_recompute(struct rm_class *cl) - This function re-computes
494 * the depth of the tree after a class has been deleted.
495 *
496 * Returns: NONE
497 */
498
499 static void
500 rmc_depth_recompute(rm_class_t *cl)
501 {
502 #if 1 /* ALTQ */
503 rm_class_t *p, *t;
504
505 p = cl;
506 while (p != NULL) {
507 if ((t = p->children_) == NULL) {
508 p->depth_ = 0;
509 } else {
510 int cdepth = 0;
511
512 while (t != NULL) {
513 if (t->depth_ > cdepth)
514 cdepth = t->depth_;
515 t = t->next_;
516 }
517
518 if (p->depth_ == cdepth + 1)
519 /* no change to this parent */
520 return;
521
522 p->depth_ = cdepth + 1;
523 }
524
525 p = p->parent_;
526 }
527 #else
528 rm_class_t *t;
529
530 if (cl->depth_ >= 1) {
531 if (cl->children_ == NULL) {
532 cl->depth_ = 0;
533 } else if ((t = cl->children_) != NULL) {
534 while (t != NULL) {
535 if (t->children_ != NULL)
536 rmc_depth_recompute(t);
537 t = t->next_;
538 }
539 } else
540 rmc_depth_compute(cl);
541 }
542 #endif
543 }
544
545 /*
546 * void
547 * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This
548 * function deletes a class from the link-sharing stucture and frees
549 * all resources associated with the class.
550 *
551 * Returns: NONE
552 */
553
554 void
555 rmc_delete_class(ifd, cl)
556 struct rm_ifdat *ifd;
557 struct rm_class *cl;
558 {
559 struct rm_class *p, *head, *previous;
560 int s;
561
562 ASSERT(cl->children_ == NULL);
563
564 if (cl->sleeping_)
565 CALLOUT_STOP(&cl->callout_);
566
567 s = splimp();
568 /*
569 * Free packets in the packet queue.
570 * XXX - this may not be a desired behavior. Packets should be
571 * re-queued.
572 */
573 rmc_dropall(cl);
574
575 /*
576 * If the class has a parent, then remove the class from the
577 * class from the parent's children chain.
578 */
579 if (cl->parent_ != NULL) {
580 head = cl->parent_->children_;
581 p = previous = head;
582 if (head->next_ == NULL) {
583 ASSERT(head == cl);
584 cl->parent_->children_ = NULL;
585 cl->parent_->leaf_ = 1;
586 } else while (p != NULL) {
587 if (p == cl) {
588 if (cl == head)
589 cl->parent_->children_ = cl->next_;
590 else
591 previous->next_ = cl->next_;
592 cl->next_ = NULL;
593 p = NULL;
594 } else {
595 previous = p;
596 p = p->next_;
597 }
598 }
599 }
600
601 /*
602 * Delete class from class priority peer list.
603 */
604 if ((p = ifd->active_[cl->pri_]) != NULL) {
605 /*
606 * If there is more than one member of this priority
607 * level, then look for class(cl) in the priority level.
608 */
609 if (p != p->peer_) {
610 while (p->peer_ != cl)
611 p = p->peer_;
612 p->peer_ = cl->peer_;
613
614 if (ifd->active_[cl->pri_] == cl)
615 ifd->active_[cl->pri_] = cl->peer_;
616 } else {
617 ASSERT(p == cl);
618 ifd->active_[cl->pri_] = NULL;
619 }
620 }
621
622 /*
623 * Recompute the WRR weights.
624 */
625 if (ifd->wrr_) {
626 ifd->alloc_[cl->pri_] -= cl->allotment_;
627 ifd->num_[cl->pri_]--;
628 rmc_wrr_set_weights(ifd);
629 }
630
631 /*
632 * Re-compute the depth of the tree.
633 */
634 #if 1 /* ALTQ */
635 rmc_depth_recompute(cl->parent_);
636 #else
637 rmc_depth_recompute(ifd->root_);
638 #endif
639
640 splx(s);
641
642 /*
643 * Free the class structure.
644 */
645 if (cl->red_ != NULL) {
646 #ifdef ALTQ_RIO
647 if (q_is_rio(cl->q_))
648 rio_destroy((rio_t *)cl->red_);
649 #endif
650 #ifdef ALTQ_RED
651 if (q_is_red(cl->q_))
652 red_destroy(cl->red_);
653 #endif
654 }
655 FREE(cl->q_, M_DEVBUF);
656 FREE(cl, M_DEVBUF);
657 }
658
659
660 /*
661 * void
662 * rmc_init(...) - Initialize the resource management data structures
663 * associated with the output portion of interface 'ifp'. 'ifd' is
664 * where the structures will be built (for backwards compatibility, the
665 * structures aren't kept in the ifnet struct). 'nsecPerByte'
666 * gives the link speed (inverse of bandwidth) in nanoseconds/byte.
667 * 'restart' is the driver-specific routine that the generic 'delay
668 * until under limit' action will call to restart output. `maxq'
669 * is the queue size of the 'link' & 'default' classes. 'maxqueued'
670 * is the maximum number of packets that the resource management
671 * code will allow to be queued 'downstream' (this is typically 1).
672 *
673 * Returns: NONE
674 */
675
676 void
677 rmc_init(ifq, ifd, nsecPerByte, restart, maxq, maxqueued, maxidle,
678 minidle, offtime, flags)
679 struct ifaltq *ifq;
680 struct rm_ifdat *ifd;
681 u_int nsecPerByte;
682 void (*restart)(struct ifaltq *);
683 int maxq, maxqueued;
684 u_int maxidle;
685 int minidle;
686 u_int offtime;
687 int flags;
688 {
689 int i, mtu;
690
691 /*
692 * Initialize the CBQ traciing/debug facility.
693 */
694 CBQTRACEINIT();
695
696 bzero((char *)ifd, sizeof (*ifd));
697 mtu = ifq->altq_ifp->if_mtu;
698 ifd->ifq_ = ifq;
699 ifd->restart = restart;
700 ifd->maxqueued_ = maxqueued;
701 ifd->ns_per_byte_ = nsecPerByte;
702 ifd->maxpkt_ = mtu;
703 ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0;
704 ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0;
705 #if 1
706 ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16;
707 if (mtu * nsecPerByte > 10 * 1000000)
708 ifd->maxiftime_ /= 4;
709 #endif
710
711 reset_cutoff(ifd);
712 CBQTRACE(rmc_init, 'INIT', ifd->cutoff_);
713
714 /*
715 * Initialize the CBQ's WRR state.
716 */
717 for (i = 0; i < RM_MAXPRIO; i++) {
718 ifd->alloc_[i] = 0;
719 ifd->M_[i] = 0;
720 ifd->num_[i] = 0;
721 ifd->na_[i] = 0;
722 ifd->active_[i] = NULL;
723 }
724
725 /*
726 * Initialize current packet state.
727 */
728 ifd->qi_ = 0;
729 ifd->qo_ = 0;
730 for (i = 0; i < RM_MAXQUEUED; i++) {
731 ifd->class_[i] = NULL;
732 ifd->curlen_[i] = 0;
733 ifd->borrowed_[i] = NULL;
734 }
735
736 /*
737 * Create the root class of the link-sharing structure.
738 */
739 if ((ifd->root_ = rmc_newclass(0, ifd,
740 nsecPerByte,
741 rmc_root_overlimit, maxq, 0, 0,
742 maxidle, minidle, offtime,
743 0, 0)) == NULL) {
744 printf("rmc_init: root class not allocated\n");
745 return ;
746 }
747 ifd->root_->depth_ = 0;
748 }
749
750 /*
751 * void
752 * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by
753 * mbuf 'm' to queue for resource class 'cl'. This routine is called
754 * by a driver's if_output routine. This routine must be called with
755 * output packet completion interrupts locked out (to avoid racing with
756 * rmc_dequeue_next).
757 *
758 * Returns: 0 on successful queueing
759 * -1 when packet drop occurs
760 */
761 int
762 rmc_queue_packet(cl, m)
763 struct rm_class *cl;
764 mbuf_t *m;
765 {
766 struct timeval now;
767 struct rm_ifdat *ifd = cl->ifdat_;
768 int cpri = cl->pri_;
769 int is_empty = qempty(cl->q_);
770
771 RM_GETTIME(now);
772 if (ifd->cutoff_ > 0) {
773 if (TV_LT(&cl->undertime_, &now)) {
774 if (ifd->cutoff_ > cl->depth_)
775 ifd->cutoff_ = cl->depth_;
776 CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_);
777 }
778 #if 1 /* ALTQ */
779 else {
780 /*
781 * the class is overlimit. if the class has
782 * underlimit ancestors, set cutoff to the lowest
783 * depth among them.
784 */
785 struct rm_class *borrow = cl->borrow_;
786
787 while (borrow != NULL &&
788 borrow->depth_ < ifd->cutoff_) {
789 if (TV_LT(&borrow->undertime_, &now)) {
790 ifd->cutoff_ = borrow->depth_;
791 CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_);
792 break;
793 }
794 borrow = borrow->borrow_;
795 }
796 }
797 #else /* !ALTQ */
798 else if ((ifd->cutoff_ > 1) && cl->borrow_) {
799 if (TV_LT(&cl->borrow_->undertime_, &now)) {
800 ifd->cutoff_ = cl->borrow_->depth_;
801 CBQTRACE(rmc_queue_packet, 'ffob',
802 cl->borrow_->depth_);
803 }
804 }
805 #endif /* !ALTQ */
806 }
807
808 if (_rmc_addq(cl, m) < 0)
809 /* failed */
810 return (-1);
811
812 if (is_empty) {
813 CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle);
814 ifd->na_[cpri]++;
815 }
816
817 if (qlen(cl->q_) > qlimit(cl->q_)) {
818 /* note: qlimit can be set to 0 or 1 */
819 rmc_drop_action(cl);
820 return (-1);
821 }
822 return (0);
823 }
824
825 /*
826 * void
827 * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all
828 * classes to see if there are satified.
829 */
830
831 static void
832 rmc_tl_satisfied(ifd, now)
833 struct rm_ifdat *ifd;
834 struct timeval *now;
835 {
836 int i;
837 rm_class_t *p, *bp;
838
839 for (i = RM_MAXPRIO - 1; i >= 0; i--) {
840 if ((bp = ifd->active_[i]) != NULL) {
841 p = bp;
842 do {
843 if (!rmc_satisfied(p, now)) {
844 ifd->cutoff_ = p->depth_;
845 return;
846 }
847 p = p->peer_;
848 } while (p != bp);
849 }
850 }
851
852 reset_cutoff(ifd);
853 }
854
855 /*
856 * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise.
857 */
858
859 static int
860 rmc_satisfied(cl, now)
861 struct rm_class *cl;
862 struct timeval *now;
863 {
864 rm_class_t *p;
865
866 if (cl == NULL)
867 return (1);
868 if (TV_LT(now, &cl->undertime_))
869 return (1);
870 if (cl->depth_ == 0) {
871 if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_))
872 return (0);
873 else
874 return (1);
875 }
876 if (cl->children_ != NULL) {
877 p = cl->children_;
878 while (p != NULL) {
879 if (!rmc_satisfied(p, now))
880 return (0);
881 p = p->next_;
882 }
883 }
884
885 return (1);
886 }
887
888 /*
889 * Return 1 if class 'cl' is under limit or can borrow from a parent,
890 * 0 if overlimit. As a side-effect, this routine will invoke the
891 * class overlimit action if the class if overlimit.
892 */
893
894 static int
895 rmc_under_limit(cl, now)
896 struct rm_class *cl;
897 struct timeval *now;
898 {
899 rm_class_t *p = cl;
900 rm_class_t *top;
901 struct rm_ifdat *ifd = cl->ifdat_;
902
903 ifd->borrowed_[ifd->qi_] = NULL;
904 /*
905 * If cl is the root class, then always return that it is
906 * underlimit. Otherwise, check to see if the class is underlimit.
907 */
908 if (cl->parent_ == NULL)
909 return (1);
910
911 if (cl->sleeping_) {
912 if (TV_LT(now, &cl->undertime_))
913 return (0);
914
915 CALLOUT_STOP(&cl->callout_);
916 cl->sleeping_ = 0;
917 cl->undertime_.tv_sec = 0;
918 return (1);
919 }
920
921 top = NULL;
922 while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) {
923 if (((cl = cl->borrow_) == NULL) ||
924 (cl->depth_ > ifd->cutoff_)) {
925 #ifdef ADJUST_CUTOFF
926 if (cl != NULL)
927 /* cutoff is taking effect, just
928 return false without calling
929 the delay action. */
930 return (0);
931 #endif
932 #ifdef BORROW_OFFTIME
933 /*
934 * check if the class can borrow offtime too.
935 * borrow offtime from the top of the borrow
936 * chain if the top class is not overloaded.
937 */
938 if (cl != NULL) {
939 /* cutoff is taking effect, use this class as top. */
940 top = cl;
941 CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_);
942 }
943 if (top != NULL && top->avgidle_ == top->minidle_)
944 top = NULL;
945 p->overtime_ = *now;
946 (p->overlimit)(p, top);
947 #else
948 p->overtime_ = *now;
949 (p->overlimit)(p, NULL);
950 #endif
951 return (0);
952 }
953 top = cl;
954 }
955
956 if (cl != p)
957 ifd->borrowed_[ifd->qi_] = cl;
958 return (1);
959 }
960
961 /*
962 * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to
963 * Packet-by-packet round robin.
964 *
965 * The heart of the weigthed round-robin scheduler, which decides which
966 * class next gets to send a packet. Highest priority first, then
967 * weighted round-robin within priorites.
968 *
969 * Each able-to-send class gets to send until its byte allocation is
970 * exhausted. Thus, the active pointer is only changed after a class has
971 * exhausted its allocation.
972 *
973 * If the scheduler finds no class that is underlimit or able to borrow,
974 * then the first class found that had a nonzero queue and is allowed to
975 * borrow gets to send.
976 */
977
978 static mbuf_t *
979 _rmc_wrr_dequeue_next(ifd, op)
980 struct rm_ifdat *ifd;
981 int op;
982 {
983 struct rm_class *cl = NULL, *first = NULL;
984 u_int deficit;
985 int cpri;
986 mbuf_t *m;
987 struct timeval now;
988
989 RM_GETTIME(now);
990
991 /*
992 * if the driver polls the top of the queue and then removes
993 * the polled packet, we must return the same packet.
994 */
995 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
996 cl = ifd->pollcache_;
997 cpri = cl->pri_;
998 if (ifd->efficient_) {
999 /* check if this class is overlimit */
1000 if (cl->undertime_.tv_sec != 0 &&
1001 rmc_under_limit(cl, &now) == 0)
1002 first = cl;
1003 }
1004 ifd->pollcache_ = NULL;
1005 goto _wrr_out;
1006 }
1007 else {
1008 /* mode == ALTDQ_POLL || pollcache == NULL */
1009 ifd->pollcache_ = NULL;
1010 ifd->borrowed_[ifd->qi_] = NULL;
1011 }
1012 #ifdef ADJUST_CUTOFF
1013 _again:
1014 #endif
1015 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1016 if (ifd->na_[cpri] == 0)
1017 continue;
1018 deficit = 0;
1019 /*
1020 * Loop through twice for a priority level, if some class
1021 * was unable to send a packet the first round because
1022 * of the weighted round-robin mechanism.
1023 * During the second loop at this level, deficit==2.
1024 * (This second loop is not needed if for every class,
1025 * "M[cl->pri_])" times "cl->allotment" is greater than
1026 * the byte size for the largest packet in the class.)
1027 */
1028 _wrr_loop:
1029 cl = ifd->active_[cpri];
1030 ASSERT(cl != NULL);
1031 do {
1032 if ((deficit < 2) && (cl->bytes_alloc_ <= 0))
1033 cl->bytes_alloc_ += cl->w_allotment_;
1034 if (!qempty(cl->q_)) {
1035 if ((cl->undertime_.tv_sec == 0) ||
1036 rmc_under_limit(cl, &now)) {
1037 if (cl->bytes_alloc_ > 0 || deficit > 1)
1038 goto _wrr_out;
1039
1040 /* underlimit but no alloc */
1041 deficit = 1;
1042 #if 1
1043 ifd->borrowed_[ifd->qi_] = NULL;
1044 #endif
1045 }
1046 else if (first == NULL && cl->borrow_ != NULL)
1047 first = cl; /* borrowing candidate */
1048 }
1049
1050 cl->bytes_alloc_ = 0;
1051 cl = cl->peer_;
1052 } while (cl != ifd->active_[cpri]);
1053
1054 if (deficit == 1) {
1055 /* first loop found an underlimit class with deficit */
1056 /* Loop on same priority level, with new deficit. */
1057 deficit = 2;
1058 goto _wrr_loop;
1059 }
1060 }
1061
1062 #ifdef ADJUST_CUTOFF
1063 /*
1064 * no underlimit class found. if cutoff is taking effect,
1065 * increase cutoff and try again.
1066 */
1067 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1068 ifd->cutoff_++;
1069 CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_);
1070 goto _again;
1071 }
1072 #endif /* ADJUST_CUTOFF */
1073 /*
1074 * If LINK_EFFICIENCY is turned on, then the first overlimit
1075 * class we encounter will send a packet if all the classes
1076 * of the link-sharing structure are overlimit.
1077 */
1078 reset_cutoff(ifd);
1079 CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_);
1080
1081 if (!ifd->efficient_ || first == NULL)
1082 return (NULL);
1083
1084 cl = first;
1085 cpri = cl->pri_;
1086 #if 0 /* too time-consuming for nothing */
1087 if (cl->sleeping_)
1088 CALLOUT_STOP(&cl->callout_);
1089 cl->sleeping_ = 0;
1090 cl->undertime_.tv_sec = 0;
1091 #endif
1092 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1093 ifd->cutoff_ = cl->borrow_->depth_;
1094
1095 /*
1096 * Deque the packet and do the book keeping...
1097 */
1098 _wrr_out:
1099 if (op == ALTDQ_REMOVE) {
1100 m = _rmc_getq(cl);
1101 if (m == NULL)
1102 panic("_rmc_wrr_dequeue_next");
1103 if (qempty(cl->q_))
1104 ifd->na_[cpri]--;
1105
1106 /*
1107 * Update class statistics and link data.
1108 */
1109 if (cl->bytes_alloc_ > 0)
1110 cl->bytes_alloc_ -= m_pktlen(m);
1111
1112 if ((cl->bytes_alloc_ <= 0) || first == cl)
1113 ifd->active_[cl->pri_] = cl->peer_;
1114 else
1115 ifd->active_[cl->pri_] = cl;
1116
1117 ifd->class_[ifd->qi_] = cl;
1118 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1119 ifd->now_[ifd->qi_] = now;
1120 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1121 ifd->queued_++;
1122 } else {
1123 /* mode == ALTDQ_PPOLL */
1124 m = _rmc_pollq(cl);
1125 ifd->pollcache_ = cl;
1126 }
1127 return (m);
1128 }
1129
1130 /*
1131 * Dequeue & return next packet from the highest priority class that
1132 * has a packet to send & has enough allocation to send it. This
1133 * routine is called by a driver whenever it needs a new packet to
1134 * output.
1135 */
1136 static mbuf_t *
1137 _rmc_prr_dequeue_next(ifd, op)
1138 struct rm_ifdat *ifd;
1139 int op;
1140 {
1141 mbuf_t *m;
1142 int cpri;
1143 struct rm_class *cl, *first = NULL;
1144 struct timeval now;
1145
1146 RM_GETTIME(now);
1147
1148 /*
1149 * if the driver polls the top of the queue and then removes
1150 * the polled packet, we must return the same packet.
1151 */
1152 if (op == ALTDQ_REMOVE && ifd->pollcache_) {
1153 cl = ifd->pollcache_;
1154 cpri = cl->pri_;
1155 ifd->pollcache_ = NULL;
1156 goto _prr_out;
1157 } else {
1158 /* mode == ALTDQ_POLL || pollcache == NULL */
1159 ifd->pollcache_ = NULL;
1160 ifd->borrowed_[ifd->qi_] = NULL;
1161 }
1162 #ifdef ADJUST_CUTOFF
1163 _again:
1164 #endif
1165 for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) {
1166 if (ifd->na_[cpri] == 0)
1167 continue;
1168 cl = ifd->active_[cpri];
1169 ASSERT(cl != NULL);
1170 do {
1171 if (!qempty(cl->q_)) {
1172 if ((cl->undertime_.tv_sec == 0) ||
1173 rmc_under_limit(cl, &now))
1174 goto _prr_out;
1175 if (first == NULL && cl->borrow_ != NULL)
1176 first = cl;
1177 }
1178 cl = cl->peer_;
1179 } while (cl != ifd->active_[cpri]);
1180 }
1181
1182 #ifdef ADJUST_CUTOFF
1183 /*
1184 * no underlimit class found. if cutoff is taking effect, increase
1185 * cutoff and try again.
1186 */
1187 if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) {
1188 ifd->cutoff_++;
1189 goto _again;
1190 }
1191 #endif /* ADJUST_CUTOFF */
1192 /*
1193 * If LINK_EFFICIENCY is turned on, then the first overlimit
1194 * class we encounter will send a packet if all the classes
1195 * of the link-sharing structure are overlimit.
1196 */
1197 reset_cutoff(ifd);
1198 if (!ifd->efficient_ || first == NULL)
1199 return (NULL);
1200
1201 cl = first;
1202 cpri = cl->pri_;
1203 #if 0 /* too time-consuming for nothing */
1204 if (cl->sleeping_)
1205 CALLOUT_STOP(&cl->callout_);
1206 cl->sleeping_ = 0;
1207 cl->undertime_.tv_sec = 0;
1208 #endif
1209 ifd->borrowed_[ifd->qi_] = cl->borrow_;
1210 ifd->cutoff_ = cl->borrow_->depth_;
1211
1212 /*
1213 * Deque the packet and do the book keeping...
1214 */
1215 _prr_out:
1216 if (op == ALTDQ_REMOVE) {
1217 m = _rmc_getq(cl);
1218 if (m == NULL)
1219 panic("_rmc_prr_dequeue_next");
1220 if (qempty(cl->q_))
1221 ifd->na_[cpri]--;
1222
1223 ifd->active_[cpri] = cl->peer_;
1224
1225 ifd->class_[ifd->qi_] = cl;
1226 ifd->curlen_[ifd->qi_] = m_pktlen(m);
1227 ifd->now_[ifd->qi_] = now;
1228 ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_;
1229 ifd->queued_++;
1230 } else {
1231 /* mode == ALTDQ_POLL */
1232 m = _rmc_pollq(cl);
1233 ifd->pollcache_ = cl;
1234 }
1235 return (m);
1236 }
1237
1238 /*
1239 * mbuf_t *
1240 * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function
1241 * is invoked by the packet driver to get the next packet to be
1242 * dequeued and output on the link. If WRR is enabled, then the
1243 * WRR dequeue next routine will determine the next packet to sent.
1244 * Otherwise, packet-by-packet round robin is invoked.
1245 *
1246 * Returns: NULL, if a packet is not available or if all
1247 * classes are overlimit.
1248 *
1249 * Otherwise, Pointer to the next packet.
1250 */
1251
1252 mbuf_t *
1253 rmc_dequeue_next(ifd, mode)
1254 struct rm_ifdat *ifd;
1255 int mode;
1256 {
1257 if (ifd->queued_ >= ifd->maxqueued_)
1258 return (NULL);
1259 else if (ifd->wrr_)
1260 return (_rmc_wrr_dequeue_next(ifd, mode));
1261 else
1262 return (_rmc_prr_dequeue_next(ifd, mode));
1263 }
1264
1265 /*
1266 * Update the utilization estimate for the packet that just completed.
1267 * The packet's class & the parent(s) of that class all get their
1268 * estimators updated. This routine is called by the driver's output-
1269 * packet-completion interrupt service routine.
1270 */
1271
1272 /*
1273 * a macro to approximate "divide by 1000" that gives 0.000999,
1274 * if a value has enough effective digits.
1275 * (on pentium, mul takes 9 cycles but div takes 46!)
1276 */
1277 #define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17))
1278 void
1279 rmc_update_class_util(ifd)
1280 struct rm_ifdat *ifd;
1281 {
1282 int idle, avgidle, pktlen;
1283 int pkt_time, tidle;
1284 rm_class_t *cl, *borrowed;
1285 rm_class_t *borrows;
1286 struct timeval *nowp;
1287
1288 /*
1289 * Get the most recent completed class.
1290 */
1291 if ((cl = ifd->class_[ifd->qo_]) == NULL)
1292 return;
1293
1294 pktlen = ifd->curlen_[ifd->qo_];
1295 borrowed = ifd->borrowed_[ifd->qo_];
1296 borrows = borrowed;
1297
1298 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1299
1300 /*
1301 * Run estimator on class and it's ancesstors.
1302 */
1303 /*
1304 * rm_update_class_util is designed to be called when the
1305 * transfer is completed from a xmit complete interrupt,
1306 * but most drivers don't implement an upcall for that.
1307 * so, just use estimated completion time.
1308 * as a result, ifd->qi_ and ifd->qo_ are always synced.
1309 */
1310 nowp = &ifd->now_[ifd->qo_];
1311 /* get pkt_time (for link) in usec */
1312 #if 1 /* use approximation */
1313 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_;
1314 pkt_time = NSEC_TO_USEC(pkt_time);
1315 #else
1316 pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000;
1317 #endif
1318 #if 1 /* ALTQ4PPP */
1319 if (TV_LT(nowp, &ifd->ifnow_)) {
1320 int iftime;
1321
1322 /*
1323 * make sure the estimated completion time does not go
1324 * too far. it can happen when the link layer supports
1325 * data compression or the interface speed is set to
1326 * a much lower value.
1327 */
1328 TV_DELTA(&ifd->ifnow_, nowp, iftime);
1329 if (iftime+pkt_time < ifd->maxiftime_) {
1330 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1331 } else {
1332 TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_);
1333 }
1334 } else {
1335 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1336 }
1337 #else
1338 if (TV_LT(nowp, &ifd->ifnow_)) {
1339 TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_);
1340 } else {
1341 TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_);
1342 }
1343 #endif
1344
1345 while (cl != NULL) {
1346 TV_DELTA(&ifd->ifnow_, &cl->last_, idle);
1347 if (idle >= 2000000)
1348 /*
1349 * this class is idle enough, reset avgidle.
1350 * (TV_DELTA returns 2000000 us when delta is large.)
1351 */
1352 cl->avgidle_ = cl->maxidle_;
1353
1354 /* get pkt_time (for class) in usec */
1355 #if 1 /* use approximation */
1356 pkt_time = pktlen * cl->ns_per_byte_;
1357 pkt_time = NSEC_TO_USEC(pkt_time);
1358 #else
1359 pkt_time = pktlen * cl->ns_per_byte_ / 1000;
1360 #endif
1361 idle -= pkt_time;
1362
1363 avgidle = cl->avgidle_;
1364 avgidle += idle - (avgidle >> RM_FILTER_GAIN);
1365 cl->avgidle_ = avgidle;
1366
1367 /* Are we overlimit ? */
1368 if (avgidle <= 0) {
1369 CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle);
1370 #if 1 /* ALTQ */
1371 /*
1372 * need some lower bound for avgidle, otherwise
1373 * a borrowing class gets unbounded penalty.
1374 */
1375 if (avgidle < cl->minidle_)
1376 avgidle = cl->avgidle_ = cl->minidle_;
1377 #endif
1378 /* set next idle to make avgidle 0 */
1379 tidle = pkt_time +
1380 (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN);
1381 TV_ADD_DELTA(nowp, tidle, &cl->undertime_);
1382 ++cl->stats_.over;
1383 } else {
1384 cl->avgidle_ =
1385 (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle;
1386 cl->undertime_.tv_sec = 0;
1387 if (cl->sleeping_) {
1388 CALLOUT_STOP(&cl->callout_);
1389 cl->sleeping_ = 0;
1390 }
1391 }
1392
1393 if (borrows != NULL) {
1394 if (borrows != cl)
1395 ++cl->stats_.borrows;
1396 else
1397 borrows = NULL;
1398 }
1399 cl->last_ = ifd->ifnow_;
1400 cl->last_pkttime_ = pkt_time;
1401
1402 #if 1
1403 if (cl->parent_ == NULL) {
1404 /* take stats of root class */
1405 PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen);
1406 }
1407 #endif
1408
1409 cl = cl->parent_;
1410 }
1411
1412 /*
1413 * Check to see if cutoff needs to set to a new level.
1414 */
1415 cl = ifd->class_[ifd->qo_];
1416 if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) {
1417 #if 1 /* ALTQ */
1418 if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) {
1419 rmc_tl_satisfied(ifd, nowp);
1420 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1421 } else {
1422 ifd->cutoff_ = borrowed->depth_;
1423 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1424 }
1425 #else /* !ALTQ */
1426 if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) {
1427 reset_cutoff(ifd);
1428 #ifdef notdef
1429 rmc_tl_satisfied(ifd, &now);
1430 #endif
1431 CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_);
1432 } else {
1433 ifd->cutoff_ = borrowed->depth_;
1434 CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_);
1435 }
1436 #endif /* !ALTQ */
1437 }
1438
1439 /*
1440 * Release class slot
1441 */
1442 ifd->borrowed_[ifd->qo_] = NULL;
1443 ifd->class_[ifd->qo_] = NULL;
1444 ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_;
1445 ifd->queued_--;
1446 }
1447
1448 /*
1449 * void
1450 * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific)
1451 * over-limit action routines. These get invoked by rmc_under_limit()
1452 * if a class with packets to send if over its bandwidth limit & can't
1453 * borrow from a parent class.
1454 *
1455 * Returns: NONE
1456 */
1457
1458 static void
1459 rmc_drop_action(cl)
1460 struct rm_class *cl;
1461 {
1462 struct rm_ifdat *ifd = cl->ifdat_;
1463
1464 ASSERT(qlen(cl->q_) > 0);
1465 _rmc_dropq(cl);
1466 if (qempty(cl->q_))
1467 ifd->na_[cl->pri_]--;
1468 }
1469
1470 void rmc_dropall(cl)
1471 struct rm_class *cl;
1472 {
1473 struct rm_ifdat *ifd = cl->ifdat_;
1474
1475 if (!qempty(cl->q_)) {
1476 _flushq(cl->q_);
1477
1478 ifd->na_[cl->pri_]--;
1479 }
1480 }
1481
1482 #if (__FreeBSD_version > 300000)
1483 /* hzto() is removed from FreeBSD-3.0 */
1484 static int hzto __P((struct timeval *));
1485
1486 static int
1487 hzto(tv)
1488 struct timeval *tv;
1489 {
1490 struct timeval t2;
1491
1492 getmicrotime(&t2);
1493 t2.tv_sec = tv->tv_sec - t2.tv_sec;
1494 t2.tv_usec = tv->tv_usec - t2.tv_usec;
1495 return (tvtohz(&t2));
1496 }
1497 #endif /* __FreeBSD_version > 300000 */
1498
1499 /*
1500 * void
1501 * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ
1502 * delay action routine. It is invoked via rmc_under_limit when the
1503 * packet is discoverd to be overlimit.
1504 *
1505 * If the delay action is result of borrow class being overlimit, then
1506 * delay for the offtime of the borrowing class that is overlimit.
1507 *
1508 * Returns: NONE
1509 */
1510
1511 void
1512 rmc_delay_action(cl, borrow)
1513 struct rm_class *cl, *borrow;
1514 {
1515 int delay, t, extradelay;
1516
1517 cl->stats_.overactions++;
1518 TV_DELTA(&cl->undertime_, &cl->overtime_, delay);
1519 #ifndef BORROW_OFFTIME
1520 delay += cl->offtime_;
1521 #endif
1522
1523 if (!cl->sleeping_) {
1524 CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle);
1525 #ifdef BORROW_OFFTIME
1526 if (borrow != NULL)
1527 extradelay = borrow->offtime_;
1528 else
1529 #endif
1530 extradelay = cl->offtime_;
1531
1532 #ifdef ALTQ
1533 /*
1534 * XXX recalculate suspend time:
1535 * current undertime is (tidle + pkt_time) calculated
1536 * from the last transmission.
1537 * tidle: time required to bring avgidle back to 0
1538 * pkt_time: target waiting time for this class
1539 * we need to replace pkt_time by offtime
1540 */
1541 extradelay -= cl->last_pkttime_;
1542 #endif
1543 if (extradelay > 0) {
1544 TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_);
1545 delay += extradelay;
1546 }
1547
1548 cl->sleeping_ = 1;
1549 cl->stats_.delays++;
1550
1551 /*
1552 * Since packets are phased randomly with respect to the
1553 * clock, 1 tick (the next clock tick) can be an arbitrarily
1554 * short time so we have to wait for at least two ticks.
1555 * NOTE: If there's no other traffic, we need the timer as
1556 * a 'backstop' to restart this class.
1557 */
1558 if (delay > tick * 2) {
1559 #ifdef __FreeBSD__
1560 /* FreeBSD rounds up the tick */
1561 t = hzto(&cl->undertime_);
1562 #else
1563 /* other BSDs round down the tick */
1564 t = hzto(&cl->undertime_) + 1;
1565 #endif
1566 } else
1567 t = 2;
1568 CALLOUT_RESET(&cl->callout_, t,
1569 (timeout_t *)rmc_restart, (caddr_t)cl);
1570 }
1571 }
1572
1573 /*
1574 * void
1575 * rmc_restart() - is just a helper routine for rmc_delay_action -- it is
1576 * called by the system timer code & is responsible checking if the
1577 * class is still sleeping (it might have been restarted as a side
1578 * effect of the queue scan on a packet arrival) and, if so, restarting
1579 * output for the class. Inspecting the class state & restarting output
1580 * require locking the class structure. In general the driver is
1581 * responsible for locking but this is the only routine that is not
1582 * called directly or indirectly from the interface driver so it has
1583 * know about system locking conventions. Under bsd, locking is done
1584 * by raising IPL to splimp so that's what's implemented here. On a
1585 * different system this would probably need to be changed.
1586 *
1587 * Returns: NONE
1588 */
1589
1590 static void
1591 rmc_restart(cl)
1592 struct rm_class *cl;
1593 {
1594 struct rm_ifdat *ifd = cl->ifdat_;
1595 int s;
1596
1597 s = splimp();
1598 if (cl->sleeping_) {
1599 cl->sleeping_ = 0;
1600 cl->undertime_.tv_sec = 0;
1601
1602 if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) {
1603 CBQTRACE(rmc_restart, 'trts', cl->stats_.handle);
1604 (ifd->restart)(ifd->ifq_);
1605 }
1606 }
1607 splx(s);
1608 }
1609
1610 /*
1611 * void
1612 * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit
1613 * handling routine for the root class of the link sharing structure.
1614 *
1615 * Returns: NONE
1616 */
1617
1618 static void
1619 rmc_root_overlimit(cl, borrow)
1620 struct rm_class *cl, *borrow;
1621 {
1622 panic("rmc_root_overlimit");
1623 }
1624
1625 /*
1626 * Packet Queue handling routines. Eventually, this is to localize the
1627 * effects on the code whether queues are red queues or droptail
1628 * queues.
1629 */
1630
1631 static int
1632 _rmc_addq(cl, m)
1633 rm_class_t *cl;
1634 mbuf_t *m;
1635 {
1636 #ifdef ALTQ_RIO
1637 if (q_is_rio(cl->q_))
1638 return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_);
1639 #endif
1640 #ifdef ALTQ_RED
1641 if (q_is_red(cl->q_))
1642 return red_addq(cl->red_, cl->q_, m, cl->pktattr_);
1643 #endif /* ALTQ_RED */
1644
1645 if (cl->flags_ & RMCF_CLEARDSCP)
1646 write_dsfield(m, cl->pktattr_, 0);
1647
1648 _addq(cl->q_, m);
1649 return (0);
1650 }
1651
1652 /* note: _rmc_dropq is not called for red */
1653 static void
1654 _rmc_dropq(cl)
1655 rm_class_t *cl;
1656 {
1657 mbuf_t *m;
1658
1659 if ((m = _getq(cl->q_)) != NULL)
1660 m_freem(m);
1661 }
1662
1663 static mbuf_t *
1664 _rmc_getq(cl)
1665 rm_class_t *cl;
1666 {
1667 #ifdef ALTQ_RIO
1668 if (q_is_rio(cl->q_))
1669 return rio_getq((rio_t *)cl->red_, cl->q_);
1670 #endif
1671 #ifdef ALTQ_RED
1672 if (q_is_red(cl->q_))
1673 return red_getq(cl->red_, cl->q_);
1674 #endif
1675 return _getq(cl->q_);
1676 }
1677
1678 static mbuf_t *
1679 _rmc_pollq(cl)
1680 rm_class_t *cl;
1681 {
1682 return qhead(cl->q_);
1683 }
1684
1685 #ifdef CBQ_TRACE
1686
1687 struct cbqtrace cbqtrace_buffer[NCBQTRACE+1];
1688 struct cbqtrace *cbqtrace_ptr = NULL;
1689 int cbqtrace_count;
1690
1691 /*
1692 * DDB hook to trace cbq events:
1693 * the last 1024 events are held in a circular buffer.
1694 * use "call cbqtrace_dump(N)" to display 20 events from Nth event.
1695 */
1696 void cbqtrace_dump(int);
1697 static char *rmc_funcname(void *);
1698
1699 static struct rmc_funcs {
1700 void *func;
1701 char *name;
1702 } rmc_funcs[] =
1703 {
1704 rmc_init, "rmc_init",
1705 rmc_queue_packet, "rmc_queue_packet",
1706 rmc_under_limit, "rmc_under_limit",
1707 rmc_update_class_util, "rmc_update_class_util",
1708 rmc_delay_action, "rmc_delay_action",
1709 rmc_restart, "rmc_restart",
1710 _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next",
1711 NULL, NULL
1712 };
1713
1714 static char *rmc_funcname(func)
1715 void *func;
1716 {
1717 struct rmc_funcs *fp;
1718
1719 for (fp = rmc_funcs; fp->func != NULL; fp++)
1720 if (fp->func == func)
1721 return (fp->name);
1722 return ("unknown");
1723 }
1724
1725 void cbqtrace_dump(counter)
1726 int counter;
1727 {
1728 int i, *p;
1729 char *cp;
1730
1731 counter = counter % NCBQTRACE;
1732 p = (int *)&cbqtrace_buffer[counter];
1733
1734 for (i=0; i<20; i++) {
1735 printf("[0x%x] ", *p++);
1736 printf("%s: ", rmc_funcname((void *)*p++));
1737 cp = (char *)p++;
1738 printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]);
1739 printf("%d\n",*p++);
1740
1741 if (p >= (int *)&cbqtrace_buffer[NCBQTRACE])
1742 p = (int *)cbqtrace_buffer;
1743 }
1744 }
1745 #endif /* CBQ_TRACE */
1746
1747 #endif /* ALTQ_CBQ */
1748
1749 #if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ)
1750 #if !defined(__GNUC__) || defined(ALTQ_DEBUG)
1751
1752 void
1753 _addq(q, m)
1754 class_queue_t *q;
1755 mbuf_t *m;
1756 {
1757 mbuf_t *m0;
1758
1759 if ((m0 = qtail(q)) != NULL)
1760 m->m_nextpkt = m0->m_nextpkt;
1761 else
1762 m0 = m;
1763 m0->m_nextpkt = m;
1764 qtail(q) = m;
1765 qlen(q)++;
1766 }
1767
1768 mbuf_t *
1769 _getq(q)
1770 class_queue_t *q;
1771 {
1772 mbuf_t *m, *m0;
1773
1774 if ((m = qtail(q)) == NULL)
1775 return (NULL);
1776 if ((m0 = m->m_nextpkt) != m)
1777 m->m_nextpkt = m0->m_nextpkt;
1778 else {
1779 ASSERT(qlen(q) == 1);
1780 qtail(q) = NULL;
1781 }
1782 qlen(q)--;
1783 return (m0);
1784 }
1785
1786 /* drop a packet at the tail of the queue */
1787 mbuf_t *
1788 _getq_tail(q)
1789 class_queue_t *q;
1790 {
1791 mbuf_t *m, *m0, *prev;
1792
1793 if ((m = m0 = qtail(q)) == NULL)
1794 return NULL;
1795 do {
1796 prev = m0;
1797 m0 = m0->m_nextpkt;
1798 } while (m0 != m);
1799 prev->m_nextpkt = m->m_nextpkt;
1800 if (prev == m) {
1801 ASSERT(qlen(q) == 1);
1802 qtail(q) = NULL;
1803 } else
1804 qtail(q) = prev;
1805 qlen(q)--;
1806 return (m);
1807 }
1808
1809 /* randomly select a packet in the queue */
1810 mbuf_t *
1811 _getq_random(q)
1812 class_queue_t *q;
1813 {
1814 struct mbuf *m;
1815 int i, n;
1816
1817 if ((m = qtail(q)) == NULL)
1818 return NULL;
1819 if (m->m_nextpkt == m) {
1820 ASSERT(qlen(q) == 1);
1821 qtail(q) = NULL;
1822 } else {
1823 struct mbuf *prev = NULL;
1824
1825 n = random() % qlen(q) + 1;
1826 for (i = 0; i < n; i++) {
1827 prev = m;
1828 m = m->m_nextpkt;
1829 }
1830 prev->m_nextpkt = m->m_nextpkt;
1831 if (m == qtail(q))
1832 qtail(q) = prev;
1833 }
1834 qlen(q)--;
1835 return (m);
1836 }
1837
1838 void
1839 _removeq(q, m)
1840 class_queue_t *q;
1841 mbuf_t *m;
1842 {
1843 mbuf_t *m0, *prev;
1844
1845 m0 = qtail(q);
1846 do {
1847 prev = m0;
1848 m0 = m0->m_nextpkt;
1849 } while (m0 != m);
1850 prev->m_nextpkt = m->m_nextpkt;
1851 if (prev == m)
1852 qtail(q) = NULL;
1853 else if (qtail(q) == m)
1854 qtail(q) = prev;
1855 qlen(q)--;
1856 }
1857
1858 void
1859 _flushq(q)
1860 class_queue_t *q;
1861 {
1862 mbuf_t *m;
1863
1864 while ((m = _getq(q)) != NULL)
1865 m_freem(m);
1866 ASSERT(qlen(q) == 0);
1867 }
1868
1869 #endif /* !__GNUC__ || ALTQ_DEBUG */
1870 #endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */
1871