altq_wfq.c revision 1.9.12.3 1 /* $NetBSD: altq_wfq.c,v 1.9.12.3 2006/09/25 03:56:59 peter Exp $ */
2 /* $KAME: altq_wfq.c,v 1.14 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2002
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29 /*
30 * March 27, 1997. Written by Hiroshi Kyusojin of Keio University
31 * (kyu (at) mt.cs.keio.ac.jp).
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(0, "$NetBSD: altq_wfq.c,v 1.9.12.3 2006/09/25 03:56:59 peter Exp $");
36
37 #ifdef _KERNEL_OPT
38 #include "opt_altq.h"
39 #include "opt_inet.h"
40 #endif
41
42 #ifdef ALTQ_WFQ
43
44 #include <sys/param.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/uio.h>
48 #include <sys/socket.h>
49 #include <sys/systm.h>
50 #include <sys/proc.h>
51 #include <sys/errno.h>
52 #include <sys/time.h>
53 #include <sys/kernel.h>
54 #include <sys/kauth.h>
55
56 #include <net/if.h>
57 #include <net/if_types.h>
58 #include <netinet/in.h>
59
60 #include <altq/altq.h>
61 #include <altq/altq_conf.h>
62 #include <altq/altq_wfq.h>
63
64 #ifdef ALTQ3_COMPAT
65 /*
66 #define WFQ_DEBUG
67 */
68
69 static int wfq_setenable(struct wfq_interface *, int);
70 static int wfq_ifattach(struct wfq_interface *);
71 static int wfq_ifdetach(struct wfq_interface *);
72 static int wfq_ifenqueue(struct ifaltq *, struct mbuf *,
73 struct altq_pktattr *);
74 static u_long wfq_hash(struct flowinfo *, int);
75 static inline u_long wfq_hashbydstaddr(struct flowinfo *, int);
76 static inline u_long wfq_hashbysrcport(struct flowinfo *, int);
77 static wfq *wfq_maxqueue(wfq_state_t *);
78 static struct mbuf *wfq_ifdequeue(struct ifaltq *, int);
79 static int wfq_getqid(struct wfq_getqid *);
80 static int wfq_setweight(struct wfq_setweight *);
81 static int wfq_getstats(struct wfq_getstats *);
82 static int wfq_config(struct wfq_conf *);
83 static int wfq_request(struct ifaltq *, int, void *);
84 static int wfq_flush(struct ifaltq *);
85 static void *wfq_classify(void *, struct mbuf *, int);
86
87 /* global value : pointer to wfq queue list */
88 static wfq_state_t *wfq_list = NULL;
89
90 static int
91 wfq_setenable(ifacep, flag)
92 struct wfq_interface *ifacep;
93 int flag;
94 {
95 wfq_state_t *wfqp;
96 int error = 0;
97
98 if ((wfqp = altq_lookup(ifacep->wfq_ifacename, ALTQT_WFQ)) == NULL)
99 return (EBADF);
100
101 switch(flag){
102 case ENABLE:
103 error = altq_enable(wfqp->ifq);
104 break;
105 case DISABLE:
106 error = altq_disable(wfqp->ifq);
107 break;
108 }
109 return error;
110 }
111
112
113 static int
114 wfq_ifattach(ifacep)
115 struct wfq_interface *ifacep;
116 {
117 int error = 0, i;
118 struct ifnet *ifp;
119 wfq_state_t *new_wfqp;
120 wfq *queue;
121
122 if ((ifp = ifunit(ifacep->wfq_ifacename)) == NULL) {
123 #ifdef WFQ_DEBUG
124 printf("wfq_ifattach()...no ifp found\n");
125 #endif
126 return (ENXIO);
127 }
128
129 if (!ALTQ_IS_READY(&ifp->if_snd)) {
130 #ifdef WFQ_DEBUG
131 printf("wfq_ifattach()...altq is not ready\n");
132 #endif
133 return (ENXIO);
134 }
135
136 /* allocate and initialize wfq_state_t */
137 new_wfqp = malloc(sizeof(wfq_state_t), M_DEVBUF, M_WAITOK|M_ZERO);
138 if (new_wfqp == NULL)
139 return (ENOMEM);
140
141 queue = malloc(sizeof(wfq) * DEFAULT_QSIZE, M_DEVBUF, M_WAITOK|M_ZERO);
142 if (queue == NULL) {
143 free(new_wfqp, M_DEVBUF);
144 return (ENOMEM);
145 }
146
147 /* keep the ifq */
148 new_wfqp->ifq = &ifp->if_snd;
149 new_wfqp->nums = DEFAULT_QSIZE;
150 new_wfqp->hwm = HWM;
151 new_wfqp->bytes = 0;
152 new_wfqp->rrp = NULL;
153 new_wfqp->queue = queue;
154 new_wfqp->hash_func = wfq_hashbydstaddr;
155 new_wfqp->fbmask = FIMB4_DADDR;
156
157 for (i = 0; i < new_wfqp->nums; i++, queue++) {
158 queue->next = queue->prev = NULL;
159 queue->head = queue->tail = NULL;
160 queue->bytes = queue->quota = 0;
161 queue->weight = 100;
162 }
163
164 /*
165 * set WFQ to this ifnet structure.
166 */
167 if ((error = altq_attach(&ifp->if_snd, ALTQT_WFQ, new_wfqp,
168 wfq_ifenqueue, wfq_ifdequeue, wfq_request,
169 new_wfqp, wfq_classify)) != 0) {
170 free(queue, M_DEVBUF);
171 free(new_wfqp, M_DEVBUF);
172 return (error);
173 }
174
175 new_wfqp->next = wfq_list;
176 wfq_list = new_wfqp;
177
178 return (error);
179 }
180
181
182 static int
183 wfq_ifdetach(ifacep)
184 struct wfq_interface *ifacep;
185 {
186 int error = 0;
187 wfq_state_t *wfqp;
188
189 if ((wfqp = altq_lookup(ifacep->wfq_ifacename, ALTQT_WFQ)) == NULL)
190 return (EBADF);
191
192 /* free queued mbuf */
193 wfq_flush(wfqp->ifq);
194
195 /* remove WFQ from the ifnet structure. */
196 (void)altq_disable(wfqp->ifq);
197 (void)altq_detach(wfqp->ifq);
198
199 /* remove from the wfqstate list */
200 if (wfq_list == wfqp)
201 wfq_list = wfqp->next;
202 else {
203 wfq_state_t *wp = wfq_list;
204 do {
205 if (wp->next == wfqp) {
206 wp->next = wfqp->next;
207 break;
208 }
209 } while ((wp = wp->next) != NULL);
210 }
211
212 /* deallocate wfq_state_t */
213 free(wfqp->queue, M_DEVBUF);
214 free(wfqp, M_DEVBUF);
215 return (error);
216 }
217
218 static int
219 wfq_request(ifq, req, arg)
220 struct ifaltq *ifq;
221 int req;
222 void *arg;
223 {
224 wfq_state_t *wfqp = (wfq_state_t *)ifq->altq_disc;
225
226 switch (req) {
227 case ALTRQ_PURGE:
228 wfq_flush(wfqp->ifq);
229 break;
230 }
231 return (0);
232 }
233
234
235 static int
236 wfq_flush(ifq)
237 struct ifaltq *ifq;
238 {
239 struct mbuf *mp;
240
241 while ((mp = wfq_ifdequeue(ifq, ALTDQ_REMOVE)) != NULL)
242 m_freem(mp);
243 if (ALTQ_IS_ENABLED(ifq))
244 ifq->ifq_len = 0;
245 return 0;
246 }
247
248 static void *
249 wfq_classify(clfier, m, af)
250 void *clfier;
251 struct mbuf *m;
252 int af;
253 {
254 wfq_state_t *wfqp = (wfq_state_t *)clfier;
255 struct flowinfo flow;
256
257 altq_extractflow(m, af, &flow, wfqp->fbmask);
258 return (&wfqp->queue[(*wfqp->hash_func)(&flow, wfqp->nums)]);
259 }
260
261 static int
262 wfq_ifenqueue(ifq, mp, pktattr)
263 struct ifaltq *ifq;
264 struct mbuf *mp;
265 struct altq_pktattr *pktattr;
266 {
267 wfq_state_t *wfqp;
268 wfq *queue;
269 int byte, error = 0;
270
271 wfqp = (wfq_state_t *)ifq->altq_disc;
272 mp->m_nextpkt = NULL;
273
274 /* grab a queue selected by classifier */
275 if (pktattr == NULL || (queue = pktattr->pattr_class) == NULL)
276 queue = &wfqp->queue[0];
277
278 if (queue->tail == NULL)
279 queue->head = mp;
280 else
281 queue->tail->m_nextpkt = mp;
282 queue->tail = mp;
283 byte = mp->m_pkthdr.len;
284 queue->bytes += byte;
285 wfqp->bytes += byte;
286 ifq->ifq_len++;
287
288 if (queue->next == NULL) {
289 /* this queue gets active. add the queue to the active list */
290 if (wfqp->rrp == NULL){
291 /* no queue in the active list */
292 queue->next = queue->prev = queue;
293 wfqp->rrp = queue;
294 WFQ_ADDQUOTA(queue);
295 } else {
296 /* insert the queue at the tail of the active list */
297 queue->prev = wfqp->rrp->prev;
298 wfqp->rrp->prev->next = queue;
299 wfqp->rrp->prev = queue;
300 queue->next = wfqp->rrp;
301 queue->quota = 0;
302 }
303 }
304
305 /* check overflow. if the total size exceeds the high water mark,
306 drop packets from the longest queue. */
307 while (wfqp->bytes > wfqp->hwm) {
308 wfq *drop_queue = wfq_maxqueue(wfqp);
309
310 /* drop the packet at the head. */
311 mp = drop_queue->head;
312 if ((drop_queue->head = mp->m_nextpkt) == NULL)
313 drop_queue->tail = NULL;
314 mp->m_nextpkt = NULL;
315 byte = mp->m_pkthdr.len;
316 drop_queue->bytes -= byte;
317 PKTCNTR_ADD(&drop_queue->drop_cnt, byte);
318 wfqp->bytes -= byte;
319 m_freem(mp);
320 ifq->ifq_len--;
321 if(drop_queue == queue)
322 /* the queue for this flow is selected to drop */
323 error = ENOBUFS;
324 }
325 return error;
326 }
327
328
329 static u_long wfq_hash(flow, n)
330 struct flowinfo *flow;
331 int n;
332 {
333 u_long val = 0;
334
335 if (flow != NULL) {
336 if (flow->fi_family == AF_INET) {
337 struct flowinfo_in *fp = (struct flowinfo_in *)flow;
338 u_long val2;
339
340 val = fp->fi_dst.s_addr ^ fp->fi_src.s_addr;
341 val = val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24);
342 val2 = fp->fi_dport ^ fp->fi_sport ^ fp->fi_proto;
343 val2 = val2 ^ (val2 >> 8);
344 val = val ^ val2;
345 }
346 #ifdef INET6
347 else if (flow->fi_family == AF_INET6) {
348 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)flow;
349
350 val = ntohl(fp6->fi6_flowlabel);
351 }
352 #endif
353 }
354
355 return (val % n);
356 }
357
358
359 static inline u_long wfq_hashbydstaddr(flow, n)
360 struct flowinfo *flow;
361 int n;
362 {
363 u_long val = 0;
364
365 if (flow != NULL) {
366 if (flow->fi_family == AF_INET) {
367 struct flowinfo_in *fp = (struct flowinfo_in *)flow;
368
369 val = fp->fi_dst.s_addr;
370 val = val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24);
371 }
372 #ifdef INET6
373 else if (flow->fi_family == AF_INET6) {
374 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)flow;
375
376 val = ntohl(fp6->fi6_flowlabel);
377 }
378 #endif
379 }
380
381 return (val % n);
382 }
383
384 static inline u_long wfq_hashbysrcport(flow, n)
385 struct flowinfo *flow;
386 int n;
387 {
388 u_long val = 0;
389
390 if (flow != NULL) {
391 if (flow->fi_family == AF_INET) {
392 struct flowinfo_in *fp = (struct flowinfo_in *)flow;
393
394 val = fp->fi_sport;
395 }
396 #ifdef INET6
397 else if (flow->fi_family == AF_INET6) {
398 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)flow;
399
400 val = fp6->fi6_sport;
401 }
402 #endif
403 }
404 val = val ^ (val >> 8);
405
406 return (val % n);
407 }
408
409 static wfq *wfq_maxqueue(wfqp)
410 wfq_state_t *wfqp;
411 {
412 int byte, max_byte = 0;
413 wfq *queue, *max_queue = NULL;
414
415 if((queue = wfqp->rrp) == NULL)
416 /* never happens */
417 return NULL;
418 do{
419 if ((byte = queue->bytes * 100 / queue->weight) > max_byte) {
420 max_queue = queue;
421 max_byte = byte;
422 }
423 } while ((queue = queue->next) != wfqp->rrp);
424
425 return max_queue;
426 }
427
428
429 static struct mbuf *
430 wfq_ifdequeue(ifq, op)
431 struct ifaltq *ifq;
432 int op;
433 {
434 wfq_state_t *wfqp;
435 wfq *queue;
436 struct mbuf *mp;
437 int byte;
438
439 wfqp = (wfq_state_t *)ifq->altq_disc;
440
441 if ((wfqp->bytes == 0) || ((queue = wfqp->rrp) == NULL))
442 /* no packet in the queues */
443 return NULL;
444
445 while (1) {
446 if (queue->quota > 0) {
447 if (queue->bytes <= 0) {
448 /* this queue no longer has packet.
449 remove the queue from the active list. */
450 if (queue->next == queue){
451 /* no other active queue
452 -- this case never happens in
453 this algorithm. */
454 queue->next = queue->prev = NULL;
455 wfqp->rrp = NULL;
456 return NULL;
457 } else {
458 queue->prev->next = queue->next;
459 queue->next->prev = queue->prev;
460 /* the round-robin pointer points
461 to this queue, advance the rrp */
462 wfqp->rrp = queue->next;
463 queue->next = queue->prev = NULL;
464 queue = wfqp->rrp;
465 WFQ_ADDQUOTA(queue);
466 continue;
467 }
468 }
469
470 /* dequeue a packet from this queue */
471 mp = queue->head;
472 if (op == ALTDQ_REMOVE) {
473 if((queue->head = mp->m_nextpkt) == NULL)
474 queue->tail = NULL;
475 byte = mp->m_pkthdr.len;
476 mp->m_nextpkt = NULL;
477 queue->quota -= byte;
478 queue->bytes -= byte;
479 PKTCNTR_ADD(&queue->xmit_cnt, byte);
480 wfqp->bytes -= byte;
481 if (ALTQ_IS_ENABLED(ifq))
482 ifq->ifq_len--;
483 }
484 return mp;
485
486 /* if the queue gets empty by this dequeueing,
487 the queue will be removed from the active list
488 at the next round */
489 }
490
491 /* advance the round-robin pointer */
492 queue = wfqp->rrp = queue->next;
493 WFQ_ADDQUOTA(queue);
494 }
495 }
496
497 static int
498 wfq_getqid(gqidp)
499 struct wfq_getqid *gqidp;
500 {
501 wfq_state_t *wfqp;
502
503 if ((wfqp = altq_lookup(gqidp->iface.wfq_ifacename, ALTQT_WFQ))
504 == NULL)
505 return (EBADF);
506
507 gqidp->qid = (*wfqp->hash_func)(&gqidp->flow, wfqp->nums);
508 return 0;
509 }
510
511 static int
512 wfq_setweight(swp)
513 struct wfq_setweight *swp;
514 {
515 wfq_state_t *wfqp;
516 wfq *queue;
517 int old;
518
519 if (swp->weight < 0) {
520 printf("set weight in natural number\n");
521 return (EINVAL);
522 }
523
524 if ((wfqp = altq_lookup(swp->iface.wfq_ifacename, ALTQT_WFQ)) == NULL)
525 return (EBADF);
526
527 queue = &wfqp->queue[swp->qid];
528 old = queue->weight;
529 queue->weight = swp->weight;
530 swp->weight = old;
531 return 0;
532 }
533
534
535 static int
536 wfq_getstats(gsp)
537 struct wfq_getstats *gsp;
538 {
539 wfq_state_t *wfqp;
540 wfq *queue;
541 queue_stats *stats;
542
543 if ((wfqp = altq_lookup(gsp->iface.wfq_ifacename, ALTQT_WFQ)) == NULL)
544 return (EBADF);
545
546 if (gsp->qid >= wfqp->nums)
547 return (EINVAL);
548
549 queue = &wfqp->queue[gsp->qid];
550 stats = &gsp->stats;
551
552 stats->bytes = queue->bytes;
553 stats->weight = queue->weight;
554 stats->xmit_cnt = queue->xmit_cnt;
555 stats->drop_cnt = queue->drop_cnt;
556
557 return 0;
558 }
559
560
561 static int
562 wfq_config(cf)
563 struct wfq_conf *cf;
564 {
565 wfq_state_t *wfqp;
566 wfq *queue;
567 int i, error = 0;
568
569 if ((wfqp = altq_lookup(cf->iface.wfq_ifacename, ALTQT_WFQ)) == NULL)
570 return (EBADF);
571
572 if(cf->nqueues <= 0 || MAX_QSIZE < cf->nqueues)
573 cf->nqueues = DEFAULT_QSIZE;
574
575 if (cf->nqueues != wfqp->nums) {
576 /* free queued mbuf */
577 wfq_flush(wfqp->ifq);
578 free(wfqp->queue, M_DEVBUF);
579
580 queue = malloc(sizeof(wfq) * cf->nqueues, M_DEVBUF,
581 M_WAITOK|M_ZERO);
582 if (queue == NULL)
583 return (ENOMEM);
584
585 wfqp->nums = cf->nqueues;
586 wfqp->bytes = 0;
587 wfqp->rrp = NULL;
588 wfqp->queue = queue;
589 for (i = 0; i < wfqp->nums; i++, queue++) {
590 queue->next = queue->prev = NULL;
591 queue->head = queue->tail = NULL;
592 queue->bytes = queue->quota = 0;
593 queue->weight = 100;
594 }
595 }
596
597 if (cf->qlimit != 0)
598 wfqp->hwm = cf->qlimit;
599
600 switch (cf->hash_policy) {
601 case WFQ_HASH_DSTADDR:
602 wfqp->hash_func = wfq_hashbydstaddr;
603 wfqp->fbmask = FIMB4_DADDR;
604 #ifdef INET6
605 wfqp->fbmask |= FIMB6_FLABEL; /* use flowlabel for ipv6 */
606 #endif
607 break;
608 case WFQ_HASH_SRCPORT:
609 wfqp->hash_func = wfq_hashbysrcport;
610 wfqp->fbmask = FIMB4_SPORT;
611 #ifdef INET6
612 wfqp->fbmask |= FIMB6_SPORT;
613 #endif
614 break;
615 case WFQ_HASH_FULL:
616 wfqp->hash_func = wfq_hash;
617 wfqp->fbmask = FIMB4_ALL;
618 #ifdef INET6
619 wfqp->fbmask |= FIMB6_FLABEL; /* use flowlabel for ipv6 */
620 #endif
621 break;
622 default:
623 error = EINVAL;
624 break;
625 }
626 return error;
627 }
628
629 /*
630 * wfq device interface
631 */
632
633 altqdev_decl(wfq);
634
635 int
636 wfqopen(dev, flag, fmt, l)
637 dev_t dev;
638 int flag, fmt;
639 struct lwp *l;
640 {
641 return 0;
642 }
643
644 int
645 wfqclose(dev, flag, fmt, l)
646 dev_t dev;
647 int flag, fmt;
648 struct lwp *l;
649 {
650 struct ifnet *ifp;
651 struct wfq_interface iface;
652 wfq_state_t *wfqp;
653 int s;
654
655 s = splnet();
656 while ((wfqp = wfq_list) != NULL) {
657 ifp = wfqp->ifq->altq_ifp;
658 sprintf(iface.wfq_ifacename, "%s", ifp->if_xname);
659 wfq_ifdetach(&iface);
660 }
661 splx(s);
662 return 0;
663 }
664
665 int
666 wfqioctl(dev, cmd, addr, flag, l)
667 dev_t dev;
668 ioctlcmd_t cmd;
669 caddr_t addr;
670 int flag;
671 struct lwp *l;
672 {
673 int error = 0;
674 int s;
675
676 /* check cmd for superuser only */
677 switch (cmd) {
678 case WFQ_GET_QID:
679 case WFQ_GET_STATS:
680 break;
681 default:
682 #if (__FreeBSD_version > 400000)
683 if ((error = suser(p)) != 0)
684 #else
685 if ((error = kauth_authorize_generic(l->l_cred,
686 KAUTH_GENERIC_ISSUSER, &l->l_acflag)) != 0)
687 #endif
688 return (error);
689 break;
690 }
691
692 s = splnet();
693 switch (cmd) {
694
695 case WFQ_ENABLE:
696 error = wfq_setenable((struct wfq_interface *)addr, ENABLE);
697 break;
698
699 case WFQ_DISABLE:
700 error = wfq_setenable((struct wfq_interface *)addr, DISABLE);
701 break;
702
703 case WFQ_IF_ATTACH:
704 error = wfq_ifattach((struct wfq_interface *)addr);
705 break;
706
707 case WFQ_IF_DETACH:
708 error = wfq_ifdetach((struct wfq_interface *)addr);
709 break;
710
711 case WFQ_GET_QID:
712 error = wfq_getqid((struct wfq_getqid *)addr);
713 break;
714
715 case WFQ_SET_WEIGHT:
716 error = wfq_setweight((struct wfq_setweight *)addr);
717 break;
718
719 case WFQ_GET_STATS:
720 error = wfq_getstats((struct wfq_getstats *)addr);
721 break;
722
723 case WFQ_CONFIG:
724 error = wfq_config((struct wfq_conf *)addr);
725 break;
726
727 default:
728 error = EINVAL;
729 break;
730 }
731 splx(s);
732 return error;
733 }
734
735 #ifdef KLD_MODULE
736
737 static struct altqsw wfq_sw =
738 {"wfq", wfqopen, wfqclose, wfqioctl};
739
740 ALTQ_MODULE(altq_wfq, ALTQT_WFQ, &wfq_sw);
741
742 #endif /* KLD_MODULE */
743
744 #endif /* ALTQ3_COMPAT */
745 #endif /* ALTQ_WFQ */
746