altq_subr.c revision 1.17 1 /* $NetBSD: altq_subr.c,v 1.17 2006/10/13 09:57:28 peter Exp $ */
2 /* $KAME: altq_subr.c,v 1.24 2005/04/13 03:44:25 suz Exp $ */
3
4 /*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: altq_subr.c,v 1.17 2006/10/13 09:57:28 peter Exp $");
32
33 #ifdef _KERNEL_OPT
34 #include "opt_altq.h"
35 #include "opt_inet.h"
36 #include "pf.h"
37 #endif
38
39 #include <sys/param.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/kernel.h>
47 #include <sys/errno.h>
48 #include <sys/syslog.h>
49 #include <sys/sysctl.h>
50 #include <sys/queue.h>
51
52 #include <net/if.h>
53 #include <net/if_dl.h>
54 #include <net/if_types.h>
55
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #ifdef INET6
60 #include <netinet/ip6.h>
61 #endif
62 #include <netinet/tcp.h>
63 #include <netinet/udp.h>
64
65 #if NPF > 0
66 #include <net/pfvar.h>
67 #endif
68 #include <altq/altq.h>
69 #ifdef ALTQ3_COMPAT
70 #include <altq/altq_conf.h>
71 #endif
72
73 /* machine dependent clock related includes */
74 #ifdef __FreeBSD__
75 #include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */
76 #include <machine/clock.h>
77 #endif
78 #if defined(__i386__)
79 #include <machine/cpufunc.h> /* for pentium tsc */
80 #include <machine/specialreg.h> /* for CPUID_TSC */
81 #ifdef __FreeBSD__
82 #include <machine/md_var.h> /* for cpu_feature */
83 #elif defined(__NetBSD__) || defined(__OpenBSD__)
84 #include <machine/cpu.h> /* for cpu_feature */
85 #endif
86 #endif /* __i386__ */
87
88 /*
89 * internal function prototypes
90 */
91 static void tbr_timeout(void *);
92 int (*altq_input)(struct mbuf *, int) = NULL;
93 static int tbr_timer = 0; /* token bucket regulator timer */
94 static struct callout tbr_callout = CALLOUT_INITIALIZER;
95
96 #if NPF > 0
97 int pfaltq_running; /* keep track of running state */
98 #endif
99
100 #ifdef ALTQ3_CLFIER_COMPAT
101 static int extract_ports4(struct mbuf *, struct ip *, struct flowinfo_in *);
102 #ifdef INET6
103 static int extract_ports6(struct mbuf *, struct ip6_hdr *,
104 struct flowinfo_in6 *);
105 #endif
106 static int apply_filter4(u_int32_t, struct flow_filter *,
107 struct flowinfo_in *);
108 static int apply_ppfilter4(u_int32_t, struct flow_filter *,
109 struct flowinfo_in *);
110 #ifdef INET6
111 static int apply_filter6(u_int32_t, struct flow_filter6 *,
112 struct flowinfo_in6 *);
113 #endif
114 static int apply_tosfilter4(u_int32_t, struct flow_filter *,
115 struct flowinfo_in *);
116 static u_long get_filt_handle(struct acc_classifier *, int);
117 static struct acc_filter *filth_to_filtp(struct acc_classifier *, u_long);
118 static u_int32_t filt2fibmask(struct flow_filter *);
119
120 static void ip4f_cache(struct ip *, struct flowinfo_in *);
121 static int ip4f_lookup(struct ip *, struct flowinfo_in *);
122 static int ip4f_init(void);
123 static struct ip4_frag *ip4f_alloc(void);
124 static void ip4f_free(struct ip4_frag *);
125 #endif /* ALTQ3_CLFIER_COMPAT */
126
127 /*
128 * alternate queueing support routines
129 */
130
131 /* look up the queue state by the interface name and the queueing type. */
132 void *
133 altq_lookup(char *name, int type)
134 {
135 struct ifnet *ifp;
136
137 if ((ifp = ifunit(name)) != NULL) {
138 if (type != ALTQT_NONE && ifp->if_snd.altq_type == type)
139 return (ifp->if_snd.altq_disc);
140 }
141
142 return NULL;
143 }
144
145 int
146 altq_attach(struct ifaltq *ifq, int type, void *discipline,
147 int (*enqueue)(struct ifaltq *, struct mbuf *, struct altq_pktattr *),
148 struct mbuf *(*dequeue)(struct ifaltq *, int),
149 int (*request)(struct ifaltq *, int, void *),
150 void *clfier, void *(*classify)(void *, struct mbuf *, int))
151 {
152 if (!ALTQ_IS_READY(ifq))
153 return ENXIO;
154
155 #ifdef ALTQ3_COMPAT
156 /*
157 * pfaltq can override the existing discipline, but altq3 cannot.
158 * check these if clfier is not NULL (which implies altq3).
159 */
160 if (clfier != NULL) {
161 if (ALTQ_IS_ENABLED(ifq))
162 return EBUSY;
163 if (ALTQ_IS_ATTACHED(ifq))
164 return EEXIST;
165 }
166 #endif
167 ifq->altq_type = type;
168 ifq->altq_disc = discipline;
169 ifq->altq_enqueue = enqueue;
170 ifq->altq_dequeue = dequeue;
171 ifq->altq_request = request;
172 ifq->altq_clfier = clfier;
173 ifq->altq_classify = classify;
174 ifq->altq_flags &= (ALTQF_CANTCHANGE|ALTQF_ENABLED);
175 #ifdef ALTQ3_COMPAT
176 #ifdef ALTQ_KLD
177 altq_module_incref(type);
178 #endif
179 #endif
180 return 0;
181 }
182
183 int
184 altq_detach(struct ifaltq *ifq)
185 {
186 if (!ALTQ_IS_READY(ifq))
187 return ENXIO;
188 if (ALTQ_IS_ENABLED(ifq))
189 return EBUSY;
190 if (!ALTQ_IS_ATTACHED(ifq))
191 return (0);
192 #ifdef ALTQ3_COMPAT
193 #ifdef ALTQ_KLD
194 altq_module_declref(ifq->altq_type);
195 #endif
196 #endif
197
198 ifq->altq_type = ALTQT_NONE;
199 ifq->altq_disc = NULL;
200 ifq->altq_enqueue = NULL;
201 ifq->altq_dequeue = NULL;
202 ifq->altq_request = NULL;
203 ifq->altq_clfier = NULL;
204 ifq->altq_classify = NULL;
205 ifq->altq_flags &= ALTQF_CANTCHANGE;
206 return 0;
207 }
208
209 int
210 altq_enable(struct ifaltq *ifq)
211 {
212 int s;
213
214 if (!ALTQ_IS_READY(ifq))
215 return ENXIO;
216 if (ALTQ_IS_ENABLED(ifq))
217 return 0;
218
219 s = splnet();
220 IFQ_PURGE(ifq);
221 ASSERT(ifq->ifq_len == 0);
222 ifq->altq_flags |= ALTQF_ENABLED;
223 if (ifq->altq_clfier != NULL)
224 ifq->altq_flags |= ALTQF_CLASSIFY;
225 splx(s);
226
227 return 0;
228 }
229
230 int
231 altq_disable(struct ifaltq *ifq)
232 {
233 int s;
234
235 if (!ALTQ_IS_ENABLED(ifq))
236 return 0;
237
238 s = splnet();
239 IFQ_PURGE(ifq);
240 ASSERT(ifq->ifq_len == 0);
241 ifq->altq_flags &= ~(ALTQF_ENABLED|ALTQF_CLASSIFY);
242 splx(s);
243 return 0;
244 }
245
246 #ifdef ALTQ_DEBUG
247 void
248 altq_assert(const char *file, int line, const char *failedexpr)
249 {
250 (void)printf("altq assertion \"%s\" failed: file \"%s\", line %d\n",
251 failedexpr, file, line);
252 panic("altq assertion");
253 /* NOTREACHED */
254 }
255 #endif
256
257 /*
258 * internal representation of token bucket parameters
259 * rate: byte_per_unittime << 32
260 * (((bits_per_sec) / 8) << 32) / machclk_freq
261 * depth: byte << 32
262 *
263 */
264 #define TBR_SHIFT 32
265 #define TBR_SCALE(x) ((int64_t)(x) << TBR_SHIFT)
266 #define TBR_UNSCALE(x) ((x) >> TBR_SHIFT)
267
268 struct mbuf *
269 tbr_dequeue(struct ifaltq *ifq, int op)
270 {
271 struct tb_regulator *tbr;
272 struct mbuf *m;
273 int64_t interval;
274 u_int64_t now;
275
276 tbr = ifq->altq_tbr;
277 if (op == ALTDQ_REMOVE && tbr->tbr_lastop == ALTDQ_POLL) {
278 /* if this is a remove after poll, bypass tbr check */
279 } else {
280 /* update token only when it is negative */
281 if (tbr->tbr_token <= 0) {
282 now = read_machclk();
283 interval = now - tbr->tbr_last;
284 if (interval >= tbr->tbr_filluptime)
285 tbr->tbr_token = tbr->tbr_depth;
286 else {
287 tbr->tbr_token += interval * tbr->tbr_rate;
288 if (tbr->tbr_token > tbr->tbr_depth)
289 tbr->tbr_token = tbr->tbr_depth;
290 }
291 tbr->tbr_last = now;
292 }
293 /* if token is still negative, don't allow dequeue */
294 if (tbr->tbr_token <= 0)
295 return (NULL);
296 }
297
298 if (ALTQ_IS_ENABLED(ifq))
299 m = (*ifq->altq_dequeue)(ifq, op);
300 else {
301 if (op == ALTDQ_POLL)
302 IF_POLL(ifq, m);
303 else
304 IF_DEQUEUE(ifq, m);
305 }
306
307 if (m != NULL && op == ALTDQ_REMOVE)
308 tbr->tbr_token -= TBR_SCALE(m_pktlen(m));
309 tbr->tbr_lastop = op;
310 return (m);
311 }
312
313 /*
314 * set a token bucket regulator.
315 * if the specified rate is zero, the token bucket regulator is deleted.
316 */
317 int
318 tbr_set(struct ifaltq *ifq, struct tb_profile *profile)
319 {
320 struct tb_regulator *tbr, *otbr;
321
322 if (machclk_freq == 0)
323 init_machclk();
324 if (machclk_freq == 0) {
325 printf("tbr_set: no CPU clock available!\n");
326 return (ENXIO);
327 }
328
329 if (profile->rate == 0) {
330 /* delete this tbr */
331 if ((tbr = ifq->altq_tbr) == NULL)
332 return (ENOENT);
333 ifq->altq_tbr = NULL;
334 free(tbr, M_DEVBUF);
335 return (0);
336 }
337
338 tbr = malloc(sizeof(struct tb_regulator), M_DEVBUF, M_WAITOK|M_ZERO);
339 if (tbr == NULL)
340 return (ENOMEM);
341
342 tbr->tbr_rate = TBR_SCALE(profile->rate / 8) / machclk_freq;
343 tbr->tbr_depth = TBR_SCALE(profile->depth);
344 if (tbr->tbr_rate > 0)
345 tbr->tbr_filluptime = tbr->tbr_depth / tbr->tbr_rate;
346 else
347 tbr->tbr_filluptime = 0xffffffffffffffffLL;
348 tbr->tbr_token = tbr->tbr_depth;
349 tbr->tbr_last = read_machclk();
350 tbr->tbr_lastop = ALTDQ_REMOVE;
351
352 otbr = ifq->altq_tbr;
353 ifq->altq_tbr = tbr; /* set the new tbr */
354
355 if (otbr != NULL)
356 free(otbr, M_DEVBUF);
357 else {
358 if (tbr_timer == 0) {
359 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
360 tbr_timer = 1;
361 }
362 }
363 return (0);
364 }
365
366 /*
367 * tbr_timeout goes through the interface list, and kicks the drivers
368 * if necessary.
369 */
370 static void
371 tbr_timeout(void *arg __unused)
372 {
373 struct ifnet *ifp;
374 int active, s;
375
376 active = 0;
377 s = splnet();
378 for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
379 if (!TBR_IS_ENABLED(&ifp->if_snd))
380 continue;
381 active++;
382 if (!IFQ_IS_EMPTY(&ifp->if_snd) && ifp->if_start != NULL)
383 (*ifp->if_start)(ifp);
384 }
385 splx(s);
386 if (active > 0)
387 CALLOUT_RESET(&tbr_callout, 1, tbr_timeout, (void *)0);
388 else
389 tbr_timer = 0; /* don't need tbr_timer anymore */
390 #if defined(__alpha__) && !defined(ALTQ_NOPCC)
391 {
392 /*
393 * XXX read out the machine dependent clock once a second
394 * to detect counter wrap-around.
395 */
396 static u_int cnt;
397
398 if (++cnt >= hz) {
399 (void)read_machclk();
400 cnt = 0;
401 }
402 }
403 #endif /* __alpha__ && !ALTQ_NOPCC */
404 }
405
406 /*
407 * get token bucket regulator profile
408 */
409 int
410 tbr_get(struct ifaltq *ifq, struct tb_profile *profile)
411 {
412 struct tb_regulator *tbr;
413
414 if ((tbr = ifq->altq_tbr) == NULL) {
415 profile->rate = 0;
416 profile->depth = 0;
417 } else {
418 profile->rate =
419 (u_int)TBR_UNSCALE(tbr->tbr_rate * 8 * machclk_freq);
420 profile->depth = (u_int)TBR_UNSCALE(tbr->tbr_depth);
421 }
422 return (0);
423 }
424
425 #if NPF > 0
426 /*
427 * attach a discipline to the interface. if one already exists, it is
428 * overridden.
429 */
430 int
431 altq_pfattach(struct pf_altq *a)
432 {
433 struct ifnet *ifp;
434 struct tb_profile tb;
435 int s, error = 0;
436
437 switch (a->scheduler) {
438 case ALTQT_NONE:
439 break;
440 #ifdef ALTQ_CBQ
441 case ALTQT_CBQ:
442 error = cbq_pfattach(a);
443 break;
444 #endif
445 #ifdef ALTQ_PRIQ
446 case ALTQT_PRIQ:
447 error = priq_pfattach(a);
448 break;
449 #endif
450 #ifdef ALTQ_HFSC
451 case ALTQT_HFSC:
452 error = hfsc_pfattach(a);
453 break;
454 #endif
455 default:
456 error = ENXIO;
457 }
458
459 ifp = ifunit(a->ifname);
460
461 /* if the state is running, enable altq */
462 if (error == 0 && pfaltq_running &&
463 ifp != NULL && ifp->if_snd.altq_type != ALTQT_NONE &&
464 !ALTQ_IS_ENABLED(&ifp->if_snd))
465 error = altq_enable(&ifp->if_snd);
466
467 /* if altq is already enabled, reset set tokenbucket regulator */
468 if (error == 0 && ifp != NULL && ALTQ_IS_ENABLED(&ifp->if_snd)) {
469 tb.rate = a->ifbandwidth;
470 tb.depth = a->tbrsize;
471 s = splnet();
472 error = tbr_set(&ifp->if_snd, &tb);
473 splx(s);
474 }
475
476 return (error);
477 }
478
479 /*
480 * detach a discipline from the interface.
481 * it is possible that the discipline was already overridden by another
482 * discipline.
483 */
484 int
485 altq_pfdetach(struct pf_altq *a)
486 {
487 struct ifnet *ifp;
488 int s, error = 0;
489
490 if ((ifp = ifunit(a->ifname)) == NULL)
491 return (EINVAL);
492
493 /* if this discipline is no longer referenced, just return */
494 if (a->altq_disc == NULL || a->altq_disc != ifp->if_snd.altq_disc)
495 return (0);
496
497 s = splnet();
498 if (ALTQ_IS_ENABLED(&ifp->if_snd))
499 error = altq_disable(&ifp->if_snd);
500 if (error == 0)
501 error = altq_detach(&ifp->if_snd);
502 splx(s);
503
504 return (error);
505 }
506
507 /*
508 * add a discipline or a queue
509 */
510 int
511 altq_add(struct pf_altq *a)
512 {
513 int error = 0;
514
515 if (a->qname[0] != 0)
516 return (altq_add_queue(a));
517
518 if (machclk_freq == 0)
519 init_machclk();
520 if (machclk_freq == 0)
521 panic("altq_add: no CPU clock");
522
523 switch (a->scheduler) {
524 #ifdef ALTQ_CBQ
525 case ALTQT_CBQ:
526 error = cbq_add_altq(a);
527 break;
528 #endif
529 #ifdef ALTQ_PRIQ
530 case ALTQT_PRIQ:
531 error = priq_add_altq(a);
532 break;
533 #endif
534 #ifdef ALTQ_HFSC
535 case ALTQT_HFSC:
536 error = hfsc_add_altq(a);
537 break;
538 #endif
539 default:
540 error = ENXIO;
541 }
542
543 return (error);
544 }
545
546 /*
547 * remove a discipline or a queue
548 */
549 int
550 altq_remove(struct pf_altq *a)
551 {
552 int error = 0;
553
554 if (a->qname[0] != 0)
555 return (altq_remove_queue(a));
556
557 switch (a->scheduler) {
558 #ifdef ALTQ_CBQ
559 case ALTQT_CBQ:
560 error = cbq_remove_altq(a);
561 break;
562 #endif
563 #ifdef ALTQ_PRIQ
564 case ALTQT_PRIQ:
565 error = priq_remove_altq(a);
566 break;
567 #endif
568 #ifdef ALTQ_HFSC
569 case ALTQT_HFSC:
570 error = hfsc_remove_altq(a);
571 break;
572 #endif
573 default:
574 error = ENXIO;
575 }
576
577 return (error);
578 }
579
580 /*
581 * add a queue to the discipline
582 */
583 int
584 altq_add_queue(struct pf_altq *a)
585 {
586 int error = 0;
587
588 switch (a->scheduler) {
589 #ifdef ALTQ_CBQ
590 case ALTQT_CBQ:
591 error = cbq_add_queue(a);
592 break;
593 #endif
594 #ifdef ALTQ_PRIQ
595 case ALTQT_PRIQ:
596 error = priq_add_queue(a);
597 break;
598 #endif
599 #ifdef ALTQ_HFSC
600 case ALTQT_HFSC:
601 error = hfsc_add_queue(a);
602 break;
603 #endif
604 default:
605 error = ENXIO;
606 }
607
608 return (error);
609 }
610
611 /*
612 * remove a queue from the discipline
613 */
614 int
615 altq_remove_queue(struct pf_altq *a)
616 {
617 int error = 0;
618
619 switch (a->scheduler) {
620 #ifdef ALTQ_CBQ
621 case ALTQT_CBQ:
622 error = cbq_remove_queue(a);
623 break;
624 #endif
625 #ifdef ALTQ_PRIQ
626 case ALTQT_PRIQ:
627 error = priq_remove_queue(a);
628 break;
629 #endif
630 #ifdef ALTQ_HFSC
631 case ALTQT_HFSC:
632 error = hfsc_remove_queue(a);
633 break;
634 #endif
635 default:
636 error = ENXIO;
637 }
638
639 return (error);
640 }
641
642 /*
643 * get queue statistics
644 */
645 int
646 altq_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
647 {
648 int error = 0;
649
650 switch (a->scheduler) {
651 #ifdef ALTQ_CBQ
652 case ALTQT_CBQ:
653 error = cbq_getqstats(a, ubuf, nbytes);
654 break;
655 #endif
656 #ifdef ALTQ_PRIQ
657 case ALTQT_PRIQ:
658 error = priq_getqstats(a, ubuf, nbytes);
659 break;
660 #endif
661 #ifdef ALTQ_HFSC
662 case ALTQT_HFSC:
663 error = hfsc_getqstats(a, ubuf, nbytes);
664 break;
665 #endif
666 default:
667 error = ENXIO;
668 }
669
670 return (error);
671 }
672 #endif /* NPF > 0 */
673
674 /*
675 * read and write diffserv field in IPv4 or IPv6 header
676 */
677 u_int8_t
678 read_dsfield(struct mbuf *m, struct altq_pktattr *pktattr)
679 {
680 struct mbuf *m0;
681 u_int8_t ds_field = 0;
682
683 if (pktattr == NULL ||
684 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
685 return ((u_int8_t)0);
686
687 /* verify that pattr_hdr is within the mbuf data */
688 for (m0 = m; m0 != NULL; m0 = m0->m_next)
689 if ((pktattr->pattr_hdr >= m0->m_data) &&
690 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
691 break;
692 if (m0 == NULL) {
693 /* ick, pattr_hdr is stale */
694 pktattr->pattr_af = AF_UNSPEC;
695 #ifdef ALTQ_DEBUG
696 printf("read_dsfield: can't locate header!\n");
697 #endif
698 return ((u_int8_t)0);
699 }
700
701 if (pktattr->pattr_af == AF_INET) {
702 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
703
704 if (ip->ip_v != 4)
705 return ((u_int8_t)0); /* version mismatch! */
706 ds_field = ip->ip_tos;
707 }
708 #ifdef INET6
709 else if (pktattr->pattr_af == AF_INET6) {
710 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
711 u_int32_t flowlabel;
712
713 flowlabel = ntohl(ip6->ip6_flow);
714 if ((flowlabel >> 28) != 6)
715 return ((u_int8_t)0); /* version mismatch! */
716 ds_field = (flowlabel >> 20) & 0xff;
717 }
718 #endif
719 return (ds_field);
720 }
721
722 void
723 write_dsfield(struct mbuf *m, struct altq_pktattr *pktattr, u_int8_t dsfield)
724 {
725 struct mbuf *m0;
726
727 if (pktattr == NULL ||
728 (pktattr->pattr_af != AF_INET && pktattr->pattr_af != AF_INET6))
729 return;
730
731 /* verify that pattr_hdr is within the mbuf data */
732 for (m0 = m; m0 != NULL; m0 = m0->m_next)
733 if ((pktattr->pattr_hdr >= m0->m_data) &&
734 (pktattr->pattr_hdr < m0->m_data + m0->m_len))
735 break;
736 if (m0 == NULL) {
737 /* ick, pattr_hdr is stale */
738 pktattr->pattr_af = AF_UNSPEC;
739 #ifdef ALTQ_DEBUG
740 printf("write_dsfield: can't locate header!\n");
741 #endif
742 return;
743 }
744
745 if (pktattr->pattr_af == AF_INET) {
746 struct ip *ip = (struct ip *)pktattr->pattr_hdr;
747 u_int8_t old;
748 int32_t sum;
749
750 if (ip->ip_v != 4)
751 return; /* version mismatch! */
752 old = ip->ip_tos;
753 dsfield |= old & 3; /* leave CU bits */
754 if (old == dsfield)
755 return;
756 ip->ip_tos = dsfield;
757 /*
758 * update checksum (from RFC1624)
759 * HC' = ~(~HC + ~m + m')
760 */
761 sum = ~ntohs(ip->ip_sum) & 0xffff;
762 sum += 0xff00 + (~old & 0xff) + dsfield;
763 sum = (sum >> 16) + (sum & 0xffff);
764 sum += (sum >> 16); /* add carry */
765
766 ip->ip_sum = htons(~sum & 0xffff);
767 }
768 #ifdef INET6
769 else if (pktattr->pattr_af == AF_INET6) {
770 struct ip6_hdr *ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
771 u_int32_t flowlabel;
772
773 flowlabel = ntohl(ip6->ip6_flow);
774 if ((flowlabel >> 28) != 6)
775 return; /* version mismatch! */
776 flowlabel = (flowlabel & 0xf03fffff) | (dsfield << 20);
777 ip6->ip6_flow = htonl(flowlabel);
778 }
779 #endif
780 return;
781 }
782
783
784 /*
785 * high resolution clock support taking advantage of a machine dependent
786 * high resolution time counter (e.g., timestamp counter of intel pentium).
787 * we assume
788 * - 64-bit-long monotonically-increasing counter
789 * - frequency range is 100M-4GHz (CPU speed)
790 */
791 /* if pcc is not available or disabled, emulate 256MHz using microtime() */
792 #define MACHCLK_SHIFT 8
793
794 int machclk_usepcc;
795 u_int32_t machclk_freq = 0;
796 u_int32_t machclk_per_tick = 0;
797
798 #ifdef __alpha__
799 #ifdef __FreeBSD__
800 extern u_int32_t cycles_per_sec; /* alpha cpu clock frequency */
801 #elif defined(__NetBSD__) || defined(__OpenBSD__)
802 extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */
803 #endif
804 #endif /* __alpha__ */
805
806 void
807 init_machclk(void)
808 {
809 machclk_usepcc = 1;
810
811 #if (!defined(__i386__) && !defined(__alpha__)) || defined(ALTQ_NOPCC)
812 machclk_usepcc = 0;
813 #endif
814 #if defined(__FreeBSD__) && defined(SMP)
815 machclk_usepcc = 0;
816 #endif
817 #if defined(__NetBSD__) && defined(MULTIPROCESSOR)
818 machclk_usepcc = 0;
819 #endif
820 #ifdef __i386__
821 /* check if TSC is available */
822 if (machclk_usepcc == 1 && (cpu_feature & CPUID_TSC) == 0)
823 machclk_usepcc = 0;
824 #endif
825
826 if (machclk_usepcc == 0) {
827 /* emulate 256MHz using microtime() */
828 machclk_freq = 1000000 << MACHCLK_SHIFT;
829 machclk_per_tick = machclk_freq / hz;
830 #ifdef ALTQ_DEBUG
831 printf("altq: emulate %uHz CPU clock\n", machclk_freq);
832 #endif
833 return;
834 }
835
836 /*
837 * if the clock frequency (of Pentium TSC or Alpha PCC) is
838 * accessible, just use it.
839 */
840 #ifdef __i386__
841 #ifdef __FreeBSD__
842 #if (__FreeBSD_version > 300000)
843 machclk_freq = tsc_freq;
844 #else
845 machclk_freq = i586_ctr_freq;
846 #endif
847 #elif defined(__NetBSD__)
848 machclk_freq = (u_int32_t)curcpu()->ci_tsc_freq;
849 #elif defined(__OpenBSD__) && (defined(I586_CPU) || defined(I686_CPU))
850 machclk_freq = pentium_mhz * 1000000;
851 #endif
852 #elif defined(__alpha__)
853 #ifdef __FreeBSD__
854 machclk_freq = cycles_per_sec;
855 #elif defined(__NetBSD__) || defined(__OpenBSD__)
856 machclk_freq = (u_int32_t)(cycles_per_usec * 1000000);
857 #endif
858 #endif /* __alpha__ */
859
860 /*
861 * if we don't know the clock frequency, measure it.
862 */
863 if (machclk_freq == 0) {
864 static int wait;
865 struct timeval tv_start, tv_end;
866 u_int64_t start, end, diff;
867 int timo;
868
869 microtime(&tv_start);
870 start = read_machclk();
871 timo = hz; /* 1 sec */
872 (void)tsleep(&wait, PWAIT | PCATCH, "init_machclk", timo);
873 microtime(&tv_end);
874 end = read_machclk();
875 diff = (u_int64_t)(tv_end.tv_sec - tv_start.tv_sec) * 1000000
876 + tv_end.tv_usec - tv_start.tv_usec;
877 if (diff != 0)
878 machclk_freq = (u_int)((end - start) * 1000000 / diff);
879 }
880
881 machclk_per_tick = machclk_freq / hz;
882
883 #ifdef ALTQ_DEBUG
884 printf("altq: CPU clock: %uHz\n", machclk_freq);
885 #endif
886 }
887
888 #if defined(__OpenBSD__) && defined(__i386__)
889 static inline u_int64_t
890 rdtsc(void)
891 {
892 u_int64_t rv;
893 __asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
894 return (rv);
895 }
896 #endif /* __OpenBSD__ && __i386__ */
897
898 u_int64_t
899 read_machclk(void)
900 {
901 u_int64_t val;
902
903 if (machclk_usepcc) {
904 #if defined(__i386__)
905 val = rdtsc();
906 #elif defined(__alpha__)
907 static u_int32_t last_pcc, upper;
908 u_int32_t pcc;
909
910 /*
911 * for alpha, make a 64bit counter value out of the 32bit
912 * alpha processor cycle counter.
913 * read_machclk must be called within a half of its
914 * wrap-around cycle (about 5 sec for 400MHz cpu) to properly
915 * detect a counter wrap-around.
916 * tbr_timeout calls read_machclk once a second.
917 */
918 pcc = (u_int32_t)alpha_rpcc();
919 if (pcc <= last_pcc)
920 upper++;
921 last_pcc = pcc;
922 val = ((u_int64_t)upper << 32) + pcc;
923 #else
924 panic("read_machclk");
925 #endif
926 } else {
927 struct timeval tv;
928
929 microtime(&tv);
930 val = (((u_int64_t)(tv.tv_sec - boottime.tv_sec) * 1000000
931 + tv.tv_usec) << MACHCLK_SHIFT);
932 }
933 return (val);
934 }
935
936 #ifdef ALTQ3_CLFIER_COMPAT
937
938 #ifndef IPPROTO_ESP
939 #define IPPROTO_ESP 50 /* encapsulating security payload */
940 #endif
941 #ifndef IPPROTO_AH
942 #define IPPROTO_AH 51 /* authentication header */
943 #endif
944
945 /*
946 * extract flow information from a given packet.
947 * filt_mask shows flowinfo fields required.
948 * we assume the ip header is in one mbuf, and addresses and ports are
949 * in network byte order.
950 */
951 int
952 altq_extractflow(struct mbuf *m, int af, struct flowinfo *flow,
953 u_int32_t filt_bmask)
954 {
955
956 switch (af) {
957 case PF_INET: {
958 struct flowinfo_in *fin;
959 struct ip *ip;
960
961 ip = mtod(m, struct ip *);
962
963 if (ip->ip_v != 4)
964 break;
965
966 fin = (struct flowinfo_in *)flow;
967 fin->fi_len = sizeof(struct flowinfo_in);
968 fin->fi_family = AF_INET;
969
970 fin->fi_proto = ip->ip_p;
971 fin->fi_tos = ip->ip_tos;
972
973 fin->fi_src.s_addr = ip->ip_src.s_addr;
974 fin->fi_dst.s_addr = ip->ip_dst.s_addr;
975
976 if (filt_bmask & FIMB4_PORTS)
977 /* if port info is required, extract port numbers */
978 extract_ports4(m, ip, fin);
979 else {
980 fin->fi_sport = 0;
981 fin->fi_dport = 0;
982 fin->fi_gpi = 0;
983 }
984 return (1);
985 }
986
987 #ifdef INET6
988 case PF_INET6: {
989 struct flowinfo_in6 *fin6;
990 struct ip6_hdr *ip6;
991
992 ip6 = mtod(m, struct ip6_hdr *);
993 /* should we check the ip version? */
994
995 fin6 = (struct flowinfo_in6 *)flow;
996 fin6->fi6_len = sizeof(struct flowinfo_in6);
997 fin6->fi6_family = AF_INET6;
998
999 fin6->fi6_proto = ip6->ip6_nxt;
1000 fin6->fi6_tclass = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
1001
1002 fin6->fi6_flowlabel = ip6->ip6_flow & htonl(0x000fffff);
1003 fin6->fi6_src = ip6->ip6_src;
1004 fin6->fi6_dst = ip6->ip6_dst;
1005
1006 if ((filt_bmask & FIMB6_PORTS) ||
1007 ((filt_bmask & FIMB6_PROTO)
1008 && ip6->ip6_nxt > IPPROTO_IPV6))
1009 /*
1010 * if port info is required, or proto is required
1011 * but there are option headers, extract port
1012 * and protocol numbers.
1013 */
1014 extract_ports6(m, ip6, fin6);
1015 else {
1016 fin6->fi6_sport = 0;
1017 fin6->fi6_dport = 0;
1018 fin6->fi6_gpi = 0;
1019 }
1020 return (1);
1021 }
1022 #endif /* INET6 */
1023
1024 default:
1025 break;
1026 }
1027
1028 /* failed */
1029 flow->fi_len = sizeof(struct flowinfo);
1030 flow->fi_family = AF_UNSPEC;
1031 return (0);
1032 }
1033
1034 /*
1035 * helper routine to extract port numbers
1036 */
1037 /* structure for ipsec and ipv6 option header template */
1038 struct _opt6 {
1039 u_int8_t opt6_nxt; /* next header */
1040 u_int8_t opt6_hlen; /* header extension length */
1041 u_int16_t _pad;
1042 u_int32_t ah_spi; /* security parameter index
1043 for authentication header */
1044 };
1045
1046 /*
1047 * extract port numbers from a ipv4 packet.
1048 */
1049 static int
1050 extract_ports4(struct mbuf *m, struct ip *ip, struct flowinfo_in *fin)
1051 {
1052 struct mbuf *m0;
1053 u_short ip_off;
1054 u_int8_t proto;
1055 int off;
1056
1057 fin->fi_sport = 0;
1058 fin->fi_dport = 0;
1059 fin->fi_gpi = 0;
1060
1061 ip_off = ntohs(ip->ip_off);
1062 /* if it is a fragment, try cached fragment info */
1063 if (ip_off & IP_OFFMASK) {
1064 ip4f_lookup(ip, fin);
1065 return (1);
1066 }
1067
1068 /* locate the mbuf containing the protocol header */
1069 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1070 if (((caddr_t)ip >= m0->m_data) &&
1071 ((caddr_t)ip < m0->m_data + m0->m_len))
1072 break;
1073 if (m0 == NULL) {
1074 #ifdef ALTQ_DEBUG
1075 printf("extract_ports4: can't locate header! ip=%p\n", ip);
1076 #endif
1077 return (0);
1078 }
1079 off = ((caddr_t)ip - m0->m_data) + (ip->ip_hl << 2);
1080 proto = ip->ip_p;
1081
1082 #ifdef ALTQ_IPSEC
1083 again:
1084 #endif
1085 while (off >= m0->m_len) {
1086 off -= m0->m_len;
1087 m0 = m0->m_next;
1088 if (m0 == NULL)
1089 return (0); /* bogus ip_hl! */
1090 }
1091 if (m0->m_len < off + 4)
1092 return (0);
1093
1094 switch (proto) {
1095 case IPPROTO_TCP:
1096 case IPPROTO_UDP: {
1097 struct udphdr *udp;
1098
1099 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1100 fin->fi_sport = udp->uh_sport;
1101 fin->fi_dport = udp->uh_dport;
1102 fin->fi_proto = proto;
1103 }
1104 break;
1105
1106 #ifdef ALTQ_IPSEC
1107 case IPPROTO_ESP:
1108 if (fin->fi_gpi == 0){
1109 u_int32_t *gpi;
1110
1111 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1112 fin->fi_gpi = *gpi;
1113 }
1114 fin->fi_proto = proto;
1115 break;
1116
1117 case IPPROTO_AH: {
1118 /* get next header and header length */
1119 struct _opt6 *opt6;
1120
1121 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1122 proto = opt6->opt6_nxt;
1123 off += 8 + (opt6->opt6_hlen * 4);
1124 if (fin->fi_gpi == 0 && m0->m_len >= off + 8)
1125 fin->fi_gpi = opt6->ah_spi;
1126 }
1127 /* goto the next header */
1128 goto again;
1129 #endif /* ALTQ_IPSEC */
1130
1131 default:
1132 fin->fi_proto = proto;
1133 return (0);
1134 }
1135
1136 /* if this is a first fragment, cache it. */
1137 if (ip_off & IP_MF)
1138 ip4f_cache(ip, fin);
1139
1140 return (1);
1141 }
1142
1143 #ifdef INET6
1144 static int
1145 extract_ports6(struct mbuf *m, struct ip6_hdr *ip6, struct flowinfo_in6 *fin6)
1146 {
1147 struct mbuf *m0;
1148 int off;
1149 u_int8_t proto;
1150
1151 fin6->fi6_gpi = 0;
1152 fin6->fi6_sport = 0;
1153 fin6->fi6_dport = 0;
1154
1155 /* locate the mbuf containing the protocol header */
1156 for (m0 = m; m0 != NULL; m0 = m0->m_next)
1157 if (((caddr_t)ip6 >= m0->m_data) &&
1158 ((caddr_t)ip6 < m0->m_data + m0->m_len))
1159 break;
1160 if (m0 == NULL) {
1161 #ifdef ALTQ_DEBUG
1162 printf("extract_ports6: can't locate header! ip6=%p\n", ip6);
1163 #endif
1164 return (0);
1165 }
1166 off = ((caddr_t)ip6 - m0->m_data) + sizeof(struct ip6_hdr);
1167
1168 proto = ip6->ip6_nxt;
1169 do {
1170 while (off >= m0->m_len) {
1171 off -= m0->m_len;
1172 m0 = m0->m_next;
1173 if (m0 == NULL)
1174 return (0);
1175 }
1176 if (m0->m_len < off + 4)
1177 return (0);
1178
1179 switch (proto) {
1180 case IPPROTO_TCP:
1181 case IPPROTO_UDP: {
1182 struct udphdr *udp;
1183
1184 udp = (struct udphdr *)(mtod(m0, caddr_t) + off);
1185 fin6->fi6_sport = udp->uh_sport;
1186 fin6->fi6_dport = udp->uh_dport;
1187 fin6->fi6_proto = proto;
1188 }
1189 return (1);
1190
1191 case IPPROTO_ESP:
1192 if (fin6->fi6_gpi == 0) {
1193 u_int32_t *gpi;
1194
1195 gpi = (u_int32_t *)(mtod(m0, caddr_t) + off);
1196 fin6->fi6_gpi = *gpi;
1197 }
1198 fin6->fi6_proto = proto;
1199 return (1);
1200
1201 case IPPROTO_AH: {
1202 /* get next header and header length */
1203 struct _opt6 *opt6;
1204
1205 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1206 if (fin6->fi6_gpi == 0 && m0->m_len >= off + 8)
1207 fin6->fi6_gpi = opt6->ah_spi;
1208 proto = opt6->opt6_nxt;
1209 off += 8 + (opt6->opt6_hlen * 4);
1210 /* goto the next header */
1211 break;
1212 }
1213
1214 case IPPROTO_HOPOPTS:
1215 case IPPROTO_ROUTING:
1216 case IPPROTO_DSTOPTS: {
1217 /* get next header and header length */
1218 struct _opt6 *opt6;
1219
1220 opt6 = (struct _opt6 *)(mtod(m0, caddr_t) + off);
1221 proto = opt6->opt6_nxt;
1222 off += (opt6->opt6_hlen + 1) * 8;
1223 /* goto the next header */
1224 break;
1225 }
1226
1227 case IPPROTO_FRAGMENT:
1228 /* ipv6 fragmentations are not supported yet */
1229 default:
1230 fin6->fi6_proto = proto;
1231 return (0);
1232 }
1233 } while (1);
1234 /*NOTREACHED*/
1235 }
1236 #endif /* INET6 */
1237
1238 /*
1239 * altq common classifier
1240 */
1241 int
1242 acc_add_filter(struct acc_classifier *classifier, struct flow_filter *filter,
1243 void *class, u_long *phandle)
1244 {
1245 struct acc_filter *afp, *prev, *tmp;
1246 int i, s;
1247
1248 #ifdef INET6
1249 if (filter->ff_flow.fi_family != AF_INET &&
1250 filter->ff_flow.fi_family != AF_INET6)
1251 return (EINVAL);
1252 #else
1253 if (filter->ff_flow.fi_family != AF_INET)
1254 return (EINVAL);
1255 #endif
1256
1257 afp = malloc(sizeof(struct acc_filter), M_DEVBUF, M_WAITOK|M_ZERO);
1258 if (afp == NULL)
1259 return (ENOMEM);
1260
1261 afp->f_filter = *filter;
1262 afp->f_class = class;
1263
1264 i = ACC_WILDCARD_INDEX;
1265 if (filter->ff_flow.fi_family == AF_INET) {
1266 struct flow_filter *filter4 = &afp->f_filter;
1267
1268 /*
1269 * if address is 0, it's a wildcard. if address mask
1270 * isn't set, use full mask.
1271 */
1272 if (filter4->ff_flow.fi_dst.s_addr == 0)
1273 filter4->ff_mask.mask_dst.s_addr = 0;
1274 else if (filter4->ff_mask.mask_dst.s_addr == 0)
1275 filter4->ff_mask.mask_dst.s_addr = 0xffffffff;
1276 if (filter4->ff_flow.fi_src.s_addr == 0)
1277 filter4->ff_mask.mask_src.s_addr = 0;
1278 else if (filter4->ff_mask.mask_src.s_addr == 0)
1279 filter4->ff_mask.mask_src.s_addr = 0xffffffff;
1280
1281 /* clear extra bits in addresses */
1282 filter4->ff_flow.fi_dst.s_addr &=
1283 filter4->ff_mask.mask_dst.s_addr;
1284 filter4->ff_flow.fi_src.s_addr &=
1285 filter4->ff_mask.mask_src.s_addr;
1286
1287 /*
1288 * if dst address is a wildcard, use hash-entry
1289 * ACC_WILDCARD_INDEX.
1290 */
1291 if (filter4->ff_mask.mask_dst.s_addr != 0xffffffff)
1292 i = ACC_WILDCARD_INDEX;
1293 else
1294 i = ACC_GET_HASH_INDEX(filter4->ff_flow.fi_dst.s_addr);
1295 }
1296 #ifdef INET6
1297 else if (filter->ff_flow.fi_family == AF_INET6) {
1298 struct flow_filter6 *filter6 =
1299 (struct flow_filter6 *)&afp->f_filter;
1300 #ifndef IN6MASK0 /* taken from kame ipv6 */
1301 #define IN6MASK0 {{{ 0, 0, 0, 0 }}}
1302 #define IN6MASK128 {{{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }}}
1303 const struct in6_addr in6mask0 = IN6MASK0;
1304 const struct in6_addr in6mask128 = IN6MASK128;
1305 #endif
1306
1307 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_dst))
1308 filter6->ff_mask6.mask6_dst = in6mask0;
1309 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_dst))
1310 filter6->ff_mask6.mask6_dst = in6mask128;
1311 if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_flow6.fi6_src))
1312 filter6->ff_mask6.mask6_src = in6mask0;
1313 else if (IN6_IS_ADDR_UNSPECIFIED(&filter6->ff_mask6.mask6_src))
1314 filter6->ff_mask6.mask6_src = in6mask128;
1315
1316 /* clear extra bits in addresses */
1317 for (i = 0; i < 16; i++)
1318 filter6->ff_flow6.fi6_dst.s6_addr[i] &=
1319 filter6->ff_mask6.mask6_dst.s6_addr[i];
1320 for (i = 0; i < 16; i++)
1321 filter6->ff_flow6.fi6_src.s6_addr[i] &=
1322 filter6->ff_mask6.mask6_src.s6_addr[i];
1323
1324 if (filter6->ff_flow6.fi6_flowlabel == 0)
1325 i = ACC_WILDCARD_INDEX;
1326 else
1327 i = ACC_GET_HASH_INDEX(filter6->ff_flow6.fi6_flowlabel);
1328 }
1329 #endif /* INET6 */
1330
1331 afp->f_handle = get_filt_handle(classifier, i);
1332
1333 /* update filter bitmask */
1334 afp->f_fbmask = filt2fibmask(filter);
1335 classifier->acc_fbmask |= afp->f_fbmask;
1336
1337 /*
1338 * add this filter to the filter list.
1339 * filters are ordered from the highest rule number.
1340 */
1341 s = splnet();
1342 prev = NULL;
1343 LIST_FOREACH(tmp, &classifier->acc_filters[i], f_chain) {
1344 if (tmp->f_filter.ff_ruleno > afp->f_filter.ff_ruleno)
1345 prev = tmp;
1346 else
1347 break;
1348 }
1349 if (prev == NULL)
1350 LIST_INSERT_HEAD(&classifier->acc_filters[i], afp, f_chain);
1351 else
1352 LIST_INSERT_AFTER(prev, afp, f_chain);
1353 splx(s);
1354
1355 *phandle = afp->f_handle;
1356 return (0);
1357 }
1358
1359 int
1360 acc_delete_filter(struct acc_classifier *classifier, u_long handle)
1361 {
1362 struct acc_filter *afp;
1363 int s;
1364
1365 if ((afp = filth_to_filtp(classifier, handle)) == NULL)
1366 return (EINVAL);
1367
1368 s = splnet();
1369 LIST_REMOVE(afp, f_chain);
1370 splx(s);
1371
1372 free(afp, M_DEVBUF);
1373
1374 /* todo: update filt_bmask */
1375
1376 return (0);
1377 }
1378
1379 /*
1380 * delete filters referencing to the specified class.
1381 * if the all flag is not 0, delete all the filters.
1382 */
1383 int
1384 acc_discard_filters(struct acc_classifier *classifier, void *class, int all)
1385 {
1386 struct acc_filter *afp;
1387 int i, s;
1388
1389 s = splnet();
1390 for (i = 0; i < ACC_FILTER_TABLESIZE; i++) {
1391 do {
1392 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1393 if (all || afp->f_class == class) {
1394 LIST_REMOVE(afp, f_chain);
1395 free(afp, M_DEVBUF);
1396 /* start again from the head */
1397 break;
1398 }
1399 } while (afp != NULL);
1400 }
1401 splx(s);
1402
1403 if (all)
1404 classifier->acc_fbmask = 0;
1405
1406 return (0);
1407 }
1408
1409 void *
1410 acc_classify(void *clfier, struct mbuf *m, int af)
1411 {
1412 struct acc_classifier *classifier;
1413 struct flowinfo flow;
1414 struct acc_filter *afp;
1415 int i;
1416
1417 classifier = (struct acc_classifier *)clfier;
1418 altq_extractflow(m, af, &flow, classifier->acc_fbmask);
1419
1420 if (flow.fi_family == AF_INET) {
1421 struct flowinfo_in *fp = (struct flowinfo_in *)&flow;
1422
1423 if ((classifier->acc_fbmask & FIMB4_ALL) == FIMB4_TOS) {
1424 /* only tos is used */
1425 LIST_FOREACH(afp,
1426 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1427 f_chain)
1428 if (apply_tosfilter4(afp->f_fbmask,
1429 &afp->f_filter, fp))
1430 /* filter matched */
1431 return (afp->f_class);
1432 } else if ((classifier->acc_fbmask &
1433 (~(FIMB4_PROTO|FIMB4_SPORT|FIMB4_DPORT) & FIMB4_ALL))
1434 == 0) {
1435 /* only proto and ports are used */
1436 LIST_FOREACH(afp,
1437 &classifier->acc_filters[ACC_WILDCARD_INDEX],
1438 f_chain)
1439 if (apply_ppfilter4(afp->f_fbmask,
1440 &afp->f_filter, fp))
1441 /* filter matched */
1442 return (afp->f_class);
1443 } else {
1444 /* get the filter hash entry from its dest address */
1445 i = ACC_GET_HASH_INDEX(fp->fi_dst.s_addr);
1446 do {
1447 /*
1448 * go through this loop twice. first for dst
1449 * hash, second for wildcards.
1450 */
1451 LIST_FOREACH(afp, &classifier->acc_filters[i],
1452 f_chain)
1453 if (apply_filter4(afp->f_fbmask,
1454 &afp->f_filter, fp))
1455 /* filter matched */
1456 return (afp->f_class);
1457
1458 /*
1459 * check again for filters with a dst addr
1460 * wildcard.
1461 * (daddr == 0 || dmask != 0xffffffff).
1462 */
1463 if (i != ACC_WILDCARD_INDEX)
1464 i = ACC_WILDCARD_INDEX;
1465 else
1466 break;
1467 } while (1);
1468 }
1469 }
1470 #ifdef INET6
1471 else if (flow.fi_family == AF_INET6) {
1472 struct flowinfo_in6 *fp6 = (struct flowinfo_in6 *)&flow;
1473
1474 /* get the filter hash entry from its flow ID */
1475 if (fp6->fi6_flowlabel != 0)
1476 i = ACC_GET_HASH_INDEX(fp6->fi6_flowlabel);
1477 else
1478 /* flowlable can be zero */
1479 i = ACC_WILDCARD_INDEX;
1480
1481 /* go through this loop twice. first for flow hash, second
1482 for wildcards. */
1483 do {
1484 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1485 if (apply_filter6(afp->f_fbmask,
1486 (struct flow_filter6 *)&afp->f_filter,
1487 fp6))
1488 /* filter matched */
1489 return (afp->f_class);
1490
1491 /*
1492 * check again for filters with a wildcard.
1493 */
1494 if (i != ACC_WILDCARD_INDEX)
1495 i = ACC_WILDCARD_INDEX;
1496 else
1497 break;
1498 } while (1);
1499 }
1500 #endif /* INET6 */
1501
1502 /* no filter matched */
1503 return (NULL);
1504 }
1505
1506 static int
1507 apply_filter4(u_int32_t fbmask, struct flow_filter *filt,
1508 struct flowinfo_in *pkt)
1509 {
1510 if (filt->ff_flow.fi_family != AF_INET)
1511 return (0);
1512 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1513 return (0);
1514 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1515 return (0);
1516 if ((fbmask & FIMB4_DADDR) &&
1517 filt->ff_flow.fi_dst.s_addr !=
1518 (pkt->fi_dst.s_addr & filt->ff_mask.mask_dst.s_addr))
1519 return (0);
1520 if ((fbmask & FIMB4_SADDR) &&
1521 filt->ff_flow.fi_src.s_addr !=
1522 (pkt->fi_src.s_addr & filt->ff_mask.mask_src.s_addr))
1523 return (0);
1524 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1525 return (0);
1526 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1527 (pkt->fi_tos & filt->ff_mask.mask_tos))
1528 return (0);
1529 if ((fbmask & FIMB4_GPI) && filt->ff_flow.fi_gpi != (pkt->fi_gpi))
1530 return (0);
1531 /* match */
1532 return (1);
1533 }
1534
1535 /*
1536 * filter matching function optimized for a common case that checks
1537 * only protocol and port numbers
1538 */
1539 static int
1540 apply_ppfilter4(u_int32_t fbmask, struct flow_filter *filt,
1541 struct flowinfo_in *pkt)
1542 {
1543 if (filt->ff_flow.fi_family != AF_INET)
1544 return (0);
1545 if ((fbmask & FIMB4_SPORT) && filt->ff_flow.fi_sport != pkt->fi_sport)
1546 return (0);
1547 if ((fbmask & FIMB4_DPORT) && filt->ff_flow.fi_dport != pkt->fi_dport)
1548 return (0);
1549 if ((fbmask & FIMB4_PROTO) && filt->ff_flow.fi_proto != pkt->fi_proto)
1550 return (0);
1551 /* match */
1552 return (1);
1553 }
1554
1555 /*
1556 * filter matching function only for tos field.
1557 */
1558 static int
1559 apply_tosfilter4(u_int32_t fbmask, struct flow_filter *filt,
1560 struct flowinfo_in *pkt)
1561 {
1562 if (filt->ff_flow.fi_family != AF_INET)
1563 return (0);
1564 if ((fbmask & FIMB4_TOS) && filt->ff_flow.fi_tos !=
1565 (pkt->fi_tos & filt->ff_mask.mask_tos))
1566 return (0);
1567 /* match */
1568 return (1);
1569 }
1570
1571 #ifdef INET6
1572 static int
1573 apply_filter6(u_int32_t fbmask, struct flow_filter6 *filt,
1574 struct flowinfo_in6 *pkt)
1575 {
1576 int i;
1577
1578 if (filt->ff_flow6.fi6_family != AF_INET6)
1579 return (0);
1580 if ((fbmask & FIMB6_FLABEL) &&
1581 filt->ff_flow6.fi6_flowlabel != pkt->fi6_flowlabel)
1582 return (0);
1583 if ((fbmask & FIMB6_PROTO) &&
1584 filt->ff_flow6.fi6_proto != pkt->fi6_proto)
1585 return (0);
1586 if ((fbmask & FIMB6_SPORT) &&
1587 filt->ff_flow6.fi6_sport != pkt->fi6_sport)
1588 return (0);
1589 if ((fbmask & FIMB6_DPORT) &&
1590 filt->ff_flow6.fi6_dport != pkt->fi6_dport)
1591 return (0);
1592 if (fbmask & FIMB6_SADDR) {
1593 for (i = 0; i < 4; i++)
1594 if (filt->ff_flow6.fi6_src.s6_addr32[i] !=
1595 (pkt->fi6_src.s6_addr32[i] &
1596 filt->ff_mask6.mask6_src.s6_addr32[i]))
1597 return (0);
1598 }
1599 if (fbmask & FIMB6_DADDR) {
1600 for (i = 0; i < 4; i++)
1601 if (filt->ff_flow6.fi6_dst.s6_addr32[i] !=
1602 (pkt->fi6_dst.s6_addr32[i] &
1603 filt->ff_mask6.mask6_dst.s6_addr32[i]))
1604 return (0);
1605 }
1606 if ((fbmask & FIMB6_TCLASS) &&
1607 filt->ff_flow6.fi6_tclass !=
1608 (pkt->fi6_tclass & filt->ff_mask6.mask6_tclass))
1609 return (0);
1610 if ((fbmask & FIMB6_GPI) &&
1611 filt->ff_flow6.fi6_gpi != pkt->fi6_gpi)
1612 return (0);
1613 /* match */
1614 return (1);
1615 }
1616 #endif /* INET6 */
1617
1618 /*
1619 * filter handle:
1620 * bit 20-28: index to the filter hash table
1621 * bit 0-19: unique id in the hash bucket.
1622 */
1623 static u_long
1624 get_filt_handle(struct acc_classifier *classifier, int i)
1625 {
1626 static u_long handle_number = 1;
1627 u_long handle;
1628 struct acc_filter *afp;
1629
1630 while (1) {
1631 handle = handle_number++ & 0x000fffff;
1632
1633 if (LIST_EMPTY(&classifier->acc_filters[i]))
1634 break;
1635
1636 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1637 if ((afp->f_handle & 0x000fffff) == handle)
1638 break;
1639 if (afp == NULL)
1640 break;
1641 /* this handle is already used, try again */
1642 }
1643
1644 return ((i << 20) | handle);
1645 }
1646
1647 /* convert filter handle to filter pointer */
1648 static struct acc_filter *
1649 filth_to_filtp(struct acc_classifier *classifier, u_long handle)
1650 {
1651 struct acc_filter *afp;
1652 int i;
1653
1654 i = ACC_GET_HINDEX(handle);
1655
1656 LIST_FOREACH(afp, &classifier->acc_filters[i], f_chain)
1657 if (afp->f_handle == handle)
1658 return (afp);
1659
1660 return (NULL);
1661 }
1662
1663 /* create flowinfo bitmask */
1664 static u_int32_t
1665 filt2fibmask(struct flow_filter *filt)
1666 {
1667 u_int32_t mask = 0;
1668 #ifdef INET6
1669 struct flow_filter6 *filt6;
1670 #endif
1671
1672 switch (filt->ff_flow.fi_family) {
1673 case AF_INET:
1674 if (filt->ff_flow.fi_proto != 0)
1675 mask |= FIMB4_PROTO;
1676 if (filt->ff_flow.fi_tos != 0)
1677 mask |= FIMB4_TOS;
1678 if (filt->ff_flow.fi_dst.s_addr != 0)
1679 mask |= FIMB4_DADDR;
1680 if (filt->ff_flow.fi_src.s_addr != 0)
1681 mask |= FIMB4_SADDR;
1682 if (filt->ff_flow.fi_sport != 0)
1683 mask |= FIMB4_SPORT;
1684 if (filt->ff_flow.fi_dport != 0)
1685 mask |= FIMB4_DPORT;
1686 if (filt->ff_flow.fi_gpi != 0)
1687 mask |= FIMB4_GPI;
1688 break;
1689 #ifdef INET6
1690 case AF_INET6:
1691 filt6 = (struct flow_filter6 *)filt;
1692
1693 if (filt6->ff_flow6.fi6_proto != 0)
1694 mask |= FIMB6_PROTO;
1695 if (filt6->ff_flow6.fi6_tclass != 0)
1696 mask |= FIMB6_TCLASS;
1697 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_dst))
1698 mask |= FIMB6_DADDR;
1699 if (!IN6_IS_ADDR_UNSPECIFIED(&filt6->ff_flow6.fi6_src))
1700 mask |= FIMB6_SADDR;
1701 if (filt6->ff_flow6.fi6_sport != 0)
1702 mask |= FIMB6_SPORT;
1703 if (filt6->ff_flow6.fi6_dport != 0)
1704 mask |= FIMB6_DPORT;
1705 if (filt6->ff_flow6.fi6_gpi != 0)
1706 mask |= FIMB6_GPI;
1707 if (filt6->ff_flow6.fi6_flowlabel != 0)
1708 mask |= FIMB6_FLABEL;
1709 break;
1710 #endif /* INET6 */
1711 }
1712 return (mask);
1713 }
1714
1715
1716 /*
1717 * helper functions to handle IPv4 fragments.
1718 * currently only in-sequence fragments are handled.
1719 * - fragment info is cached in a LRU list.
1720 * - when a first fragment is found, cache its flow info.
1721 * - when a non-first fragment is found, lookup the cache.
1722 */
1723
1724 struct ip4_frag {
1725 TAILQ_ENTRY(ip4_frag) ip4f_chain;
1726 char ip4f_valid;
1727 u_short ip4f_id;
1728 struct flowinfo_in ip4f_info;
1729 };
1730
1731 static TAILQ_HEAD(ip4f_list, ip4_frag) ip4f_list; /* IPv4 fragment cache */
1732
1733 #define IP4F_TABSIZE 16 /* IPv4 fragment cache size */
1734
1735
1736 static void
1737 ip4f_cache(struct ip *ip, struct flowinfo_in *fin)
1738 {
1739 struct ip4_frag *fp;
1740
1741 if (TAILQ_EMPTY(&ip4f_list)) {
1742 /* first time call, allocate fragment cache entries. */
1743 if (ip4f_init() < 0)
1744 /* allocation failed! */
1745 return;
1746 }
1747
1748 fp = ip4f_alloc();
1749 fp->ip4f_id = ip->ip_id;
1750 fp->ip4f_info.fi_proto = ip->ip_p;
1751 fp->ip4f_info.fi_src.s_addr = ip->ip_src.s_addr;
1752 fp->ip4f_info.fi_dst.s_addr = ip->ip_dst.s_addr;
1753
1754 /* save port numbers */
1755 fp->ip4f_info.fi_sport = fin->fi_sport;
1756 fp->ip4f_info.fi_dport = fin->fi_dport;
1757 fp->ip4f_info.fi_gpi = fin->fi_gpi;
1758 }
1759
1760 static int
1761 ip4f_lookup(struct ip *ip, struct flowinfo_in *fin)
1762 {
1763 struct ip4_frag *fp;
1764
1765 for (fp = TAILQ_FIRST(&ip4f_list); fp != NULL && fp->ip4f_valid;
1766 fp = TAILQ_NEXT(fp, ip4f_chain))
1767 if (ip->ip_id == fp->ip4f_id &&
1768 ip->ip_src.s_addr == fp->ip4f_info.fi_src.s_addr &&
1769 ip->ip_dst.s_addr == fp->ip4f_info.fi_dst.s_addr &&
1770 ip->ip_p == fp->ip4f_info.fi_proto) {
1771
1772 /* found the matching entry */
1773 fin->fi_sport = fp->ip4f_info.fi_sport;
1774 fin->fi_dport = fp->ip4f_info.fi_dport;
1775 fin->fi_gpi = fp->ip4f_info.fi_gpi;
1776
1777 if ((ntohs(ip->ip_off) & IP_MF) == 0)
1778 /* this is the last fragment,
1779 release the entry. */
1780 ip4f_free(fp);
1781
1782 return (1);
1783 }
1784
1785 /* no matching entry found */
1786 return (0);
1787 }
1788
1789 static int
1790 ip4f_init(void)
1791 {
1792 struct ip4_frag *fp;
1793 int i;
1794
1795 TAILQ_INIT(&ip4f_list);
1796 for (i=0; i<IP4F_TABSIZE; i++) {
1797 fp = malloc(sizeof(struct ip4_frag), M_DEVBUF, M_NOWAIT);
1798 if (fp == NULL) {
1799 printf("ip4f_init: can't alloc %dth entry!\n", i);
1800 if (i == 0)
1801 return (-1);
1802 return (0);
1803 }
1804 fp->ip4f_valid = 0;
1805 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1806 }
1807 return (0);
1808 }
1809
1810 static struct ip4_frag *
1811 ip4f_alloc(void)
1812 {
1813 struct ip4_frag *fp;
1814
1815 /* reclaim an entry at the tail, put it at the head */
1816 fp = TAILQ_LAST(&ip4f_list, ip4f_list);
1817 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1818 fp->ip4f_valid = 1;
1819 TAILQ_INSERT_HEAD(&ip4f_list, fp, ip4f_chain);
1820 return (fp);
1821 }
1822
1823 static void
1824 ip4f_free(struct ip4_frag *fp)
1825 {
1826 TAILQ_REMOVE(&ip4f_list, fp, ip4f_chain);
1827 fp->ip4f_valid = 0;
1828 TAILQ_INSERT_TAIL(&ip4f_list, fp, ip4f_chain);
1829 }
1830
1831 #endif /* ALTQ3_CLFIER_COMPAT */
1832